https://git.reactos.org/?p=reactos.git;a=commitdiff;h=c7bebe40cbaf33906dbc2…
commit c7bebe40cbaf33906dbc2217c15b6e56f5278b6c
Author: Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
AuthorDate: Sat Apr 6 20:11:07 2019 +0900
Commit: GitHub <noreply(a)github.com>
CommitDate: Sat Apr 6 20:11:07 2019 +0900
[KERNEL32] Improve MultiByteToWideChar (#1477)
Reduce MultiByteToWideChar failures. CORE-13349
---
dll/win32/kernel32/winnls/string/nls.c | 111 ++++++++++++++++++++++++++++++---
1 file changed, 101 insertions(+), 10 deletions(-)
diff --git a/dll/win32/kernel32/winnls/string/nls.c
b/dll/win32/kernel32/winnls/string/nls.c
index 391eca8fae..78574c6b43 100644
--- a/dll/win32/kernel32/winnls/string/nls.c
+++ b/dll/win32/kernel32/winnls/string/nls.c
@@ -7,6 +7,7 @@
* Hartmut Birr
* Gunnar Andre Dalsnes
* Thomas Weidenmueller
+ * Katayama Hirofumi MZ
* UPDATE HISTORY:
* Created 24/08/2004
*/
@@ -36,6 +37,10 @@ static const char UTF8Length[128] =
/* First byte mask depending on UTF-8 sequence length. */
static const unsigned char UTF8Mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+/* UTF-8 length to lower bound */
+static const unsigned long UTF8LBound[] =
+ {0, 0x80, 0x800, 0x10000, 0x200000, 0x2000000, 0xFFFFFFFF};
+
/* FIXME: Change to HASH table or linear array. */
static LIST_ENTRY CodePageListHead;
static CODEPAGE_ENTRY AnsiCodePage;
@@ -352,7 +357,6 @@ IntGetCodePageEntry(UINT CodePage)
* Internal version of MultiByteToWideChar for UTF8.
*
* @see MultiByteToWideChar
- * @todo Add UTF8 validity checks.
*/
static
@@ -364,10 +368,12 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
LPWSTR WideCharString,
INT WideCharCount)
{
- LPCSTR MbsEnd;
- UCHAR Char, Length;
+ LPCSTR MbsEnd, MbsPtrSave;
+ UCHAR Char, TrailLength;
WCHAR WideChar;
LONG Count;
+ BOOL CharIsValid, StringIsValid = TRUE;
+ const WCHAR InvalidChar = 0xFFFD;
if (Flags != 0 && Flags != MB_ERR_INVALID_CHARS)
{
@@ -378,17 +384,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
/* Does caller query for output buffer size? */
if (WideCharCount == 0)
{
+ /* validate and count the wide characters */
MbsEnd = MultiByteString + MultiByteCount;
for (; MultiByteString < MbsEnd; WideCharCount++)
{
Char = *MultiByteString++;
if (Char < 0xC0)
+ {
+ TrailLength = 0;
continue;
- MultiByteString += UTF8Length[Char - 0x80];
+ }
+ if (Char >= 0xF8 || (Char & 0xC0) == 0x80)
+ {
+ TrailLength = 0;
+ StringIsValid = FALSE;
+ continue;
+ }
+
+ CharIsValid = TRUE;
+ MbsPtrSave = MultiByteString;
+ TrailLength = UTF8Length[Char - 0x80];
+ WideChar = Char & UTF8Mask[TrailLength];
+
+ while (TrailLength && MultiByteString < MbsEnd)
+ {
+ if ((*MultiByteString & 0xC0) != 0x80)
+ {
+ CharIsValid = StringIsValid = FALSE;
+ break;
+ }
+
+ WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f);
+ TrailLength--;
+ }
+
+ if (!CharIsValid || WideChar < UTF8LBound[UTF8Length[Char - 0x80]])
+ {
+ MultiByteString = MbsPtrSave;
+ }
+ }
+
+ if (TrailLength)
+ {
+ WideCharCount++;
}
+
+ if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength))
+ {
+ SetLastError(ERROR_NO_UNICODE_TRANSLATION);
+ return 0;
+ }
+
return WideCharCount;
}
+ /* convert */
MbsEnd = MultiByteString + MultiByteCount;
for (Count = 0; Count < WideCharCount && MultiByteString < MbsEnd;
Count++)
{
@@ -396,20 +446,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
if (Char < 0x80)
{
*WideCharString++ = Char;
+ TrailLength = 0;
+ continue;
+ }
+ if (Char >= 0xF8 || Char == 0x80 || (Char & 0xC0) == 0x80)
+ {
+ *WideCharString++ = InvalidChar;
+ TrailLength = 0;
continue;
}
- Length = UTF8Length[Char - 0x80];
- WideChar = Char & UTF8Mask[Length];
- while (Length && MultiByteString < MbsEnd)
+
+ CharIsValid = TRUE;
+ MbsPtrSave = MultiByteString;
+ TrailLength = UTF8Length[Char - 0x80];
+ WideChar = Char & UTF8Mask[TrailLength];
+
+ while (TrailLength && MultiByteString < MbsEnd)
{
+ if ((*MultiByteString & 0xC0) != 0x80)
+ {
+ CharIsValid = StringIsValid = FALSE;
+ break;
+ }
+
WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f);
- Length--;
+ TrailLength--;
+ }
+
+ if (CharIsValid && UTF8LBound[UTF8Length[Char - 0x80]] <= WideChar)
+ {
+ *WideCharString++ = WideChar;
+ }
+ else
+ {
+ *WideCharString++ = InvalidChar;
+ MultiByteString = MbsPtrSave;
}
- *WideCharString++ = WideChar;
+ }
+
+ if (TrailLength && Count < WideCharCount && MultiByteString <
MbsEnd)
+ {
+ *WideCharString = InvalidChar;
+ WideCharCount++;
}
if (MultiByteString < MbsEnd)
+ {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
+ return 0;
+ }
+
+ if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength))
+ {
+ SetLastError(ERROR_NO_UNICODE_TRANSLATION);
+ return 0;
+ }
return Count;
}
@@ -549,7 +640,7 @@ IntMultiByteToWideCharCP(UINT CodePage,
if (MultiByteString == MbsEnd)
{
- *WideCharString++ = UNICODE_NULL;
+ *WideCharString++ = MultiByteTable[Char];
}
else if (*MultiByteString == 0)
{