Author: dchapyshev Date: Sat Aug 27 20:18:18 2016 New Revision: 72476
URL: http://svn.reactos.org/svn/reactos?rev=72476&view=rev Log: [RTL] - Implement IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS flags - Fix IS_TEXT_UNICODE_STATISTICS
* It fixes 2 tests for RtlIsTextUnicode function. Now all rtlstr in winetest are passed successfully
Modified: trunk/reactos/sdk/lib/rtl/unicode.c
Modified: trunk/reactos/sdk/lib/rtl/unicode.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/sdk/lib/rtl/unicode.c?rev=7... ============================================================================== --- trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] (original) +++ trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] Sat Aug 27 20:18:18 2016 @@ -1245,6 +1245,12 @@ const WCHAR *s = buf; int i; unsigned int flags = MAXULONG, out_flags = 0; + UCHAR last_lo_byte = 0; + UCHAR last_hi_byte = 0; + ULONG hi_byte_diff = 0; + ULONG lo_byte_diff = 0; + ULONG weight = 3; + ULONG lead_byte = 0;
if (len < sizeof(WCHAR)) { @@ -1279,19 +1285,47 @@ if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE; if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
+ for (i = 0; i < len; i++) + { + UCHAR lo_byte = LOBYTE(s[i]); + UCHAR hi_byte = HIBYTE(s[i]); + + lo_byte_diff += max(lo_byte, last_lo_byte) - min(lo_byte, last_lo_byte); + hi_byte_diff += max(hi_byte, last_hi_byte) - min(hi_byte, last_hi_byte); + + last_lo_byte = lo_byte; + last_hi_byte = hi_byte; + } + + if (NlsMbCodePageTag) + { + for (i = 0; i < len; i++) + { + if (NlsLeadByteInfo[s[i]]) + { + ++lead_byte; + ++i; + } + } + + if (lead_byte) + { + weight = (len / 2) - 1; + + if (lead_byte < (weight / 3)) + weight = 3; + else if (lead_byte < ((weight * 2) / 3)) + weight = 2; + else + weight = 1; + } + } + /* apply some statistical analysis */ - if (flags & IS_TEXT_UNICODE_STATISTICS) - { - int stats = 0; - - /* FIXME: checks only for ASCII characters in the unicode stream */ - for (i = 0; i < len; i++) - { - if (s[i] <= 255) stats++; - } - - if (stats > len / 2) - out_flags |= IS_TEXT_UNICODE_STATISTICS; + if ((flags & IS_TEXT_UNICODE_STATISTICS) && + ((weight * hi_byte_diff) < lo_byte_diff)) + { + out_flags |= IS_TEXT_UNICODE_STATISTICS; }
/* Check for unicode NULL chars */ @@ -1328,6 +1362,16 @@ out_flags |= IS_TEXT_UNICODE_REVERSE_CONTROLS; break; } + } + + if (hi_byte_diff && !lo_byte_diff) + { + out_flags |= IS_TEXT_UNICODE_REVERSE_ASCII16; + } + + if ((weight * lo_byte_diff) < hi_byte_diff) + { + out_flags |= IS_TEXT_UNICODE_REVERSE_STATISTICS; } }