Author: dchapyshev
Date: Sat Aug 27 20:18:18 2016
New Revision: 72476
URL:
http://svn.reactos.org/svn/reactos?rev=72476&view=rev
Log:
[RTL]
- Implement IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS flags
- Fix IS_TEXT_UNICODE_STATISTICS
* It fixes 2 tests for RtlIsTextUnicode function. Now all rtlstr in winetest are passed
successfully
Modified:
trunk/reactos/sdk/lib/rtl/unicode.c
Modified: trunk/reactos/sdk/lib/rtl/unicode.c
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/sdk/lib/rtl/unicode.c?rev=…
==============================================================================
--- trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] (original)
+++ trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] Sat Aug 27 20:18:18 2016
@@ -1245,6 +1245,12 @@
const WCHAR *s = buf;
int i;
unsigned int flags = MAXULONG, out_flags = 0;
+ UCHAR last_lo_byte = 0;
+ UCHAR last_hi_byte = 0;
+ ULONG hi_byte_diff = 0;
+ ULONG lo_byte_diff = 0;
+ ULONG weight = 3;
+ ULONG lead_byte = 0;
if (len < sizeof(WCHAR))
{
@@ -1279,19 +1285,47 @@
if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE;
if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
+ for (i = 0; i < len; i++)
+ {
+ UCHAR lo_byte = LOBYTE(s[i]);
+ UCHAR hi_byte = HIBYTE(s[i]);
+
+ lo_byte_diff += max(lo_byte, last_lo_byte) - min(lo_byte, last_lo_byte);
+ hi_byte_diff += max(hi_byte, last_hi_byte) - min(hi_byte, last_hi_byte);
+
+ last_lo_byte = lo_byte;
+ last_hi_byte = hi_byte;
+ }
+
+ if (NlsMbCodePageTag)
+ {
+ for (i = 0; i < len; i++)
+ {
+ if (NlsLeadByteInfo[s[i]])
+ {
+ ++lead_byte;
+ ++i;
+ }
+ }
+
+ if (lead_byte)
+ {
+ weight = (len / 2) - 1;
+
+ if (lead_byte < (weight / 3))
+ weight = 3;
+ else if (lead_byte < ((weight * 2) / 3))
+ weight = 2;
+ else
+ weight = 1;
+ }
+ }
+
/* apply some statistical analysis */
- if (flags & IS_TEXT_UNICODE_STATISTICS)
- {
- int stats = 0;
-
- /* FIXME: checks only for ASCII characters in the unicode stream */
- for (i = 0; i < len; i++)
- {
- if (s[i] <= 255) stats++;
- }
-
- if (stats > len / 2)
- out_flags |= IS_TEXT_UNICODE_STATISTICS;
+ if ((flags & IS_TEXT_UNICODE_STATISTICS) &&
+ ((weight * hi_byte_diff) < lo_byte_diff))
+ {
+ out_flags |= IS_TEXT_UNICODE_STATISTICS;
}
/* Check for unicode NULL chars */
@@ -1328,6 +1362,16 @@
out_flags |= IS_TEXT_UNICODE_REVERSE_CONTROLS;
break;
}
+ }
+
+ if (hi_byte_diff && !lo_byte_diff)
+ {
+ out_flags |= IS_TEXT_UNICODE_REVERSE_ASCII16;
+ }
+
+ if ((weight * lo_byte_diff) < hi_byte_diff)
+ {
+ out_flags |= IS_TEXT_UNICODE_REVERSE_STATISTICS;
}
}