https://git.reactos.org/?p=reactos.git;a=commitdiff;h=c7bebe40cbaf33906dbc22...
commit c7bebe40cbaf33906dbc2217c15b6e56f5278b6c Author: Katayama Hirofumi MZ katayama.hirofumi.mz@gmail.com AuthorDate: Sat Apr 6 20:11:07 2019 +0900 Commit: GitHub noreply@github.com CommitDate: Sat Apr 6 20:11:07 2019 +0900
[KERNEL32] Improve MultiByteToWideChar (#1477)
Reduce MultiByteToWideChar failures. CORE-13349 --- dll/win32/kernel32/winnls/string/nls.c | 111 ++++++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 10 deletions(-)
diff --git a/dll/win32/kernel32/winnls/string/nls.c b/dll/win32/kernel32/winnls/string/nls.c index 391eca8fae..78574c6b43 100644 --- a/dll/win32/kernel32/winnls/string/nls.c +++ b/dll/win32/kernel32/winnls/string/nls.c @@ -7,6 +7,7 @@ * Hartmut Birr * Gunnar Andre Dalsnes * Thomas Weidenmueller + * Katayama Hirofumi MZ * UPDATE HISTORY: * Created 24/08/2004 */ @@ -36,6 +37,10 @@ static const char UTF8Length[128] = /* First byte mask depending on UTF-8 sequence length. */ static const unsigned char UTF8Mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+/* UTF-8 length to lower bound */ +static const unsigned long UTF8LBound[] = + {0, 0x80, 0x800, 0x10000, 0x200000, 0x2000000, 0xFFFFFFFF}; + /* FIXME: Change to HASH table or linear array. */ static LIST_ENTRY CodePageListHead; static CODEPAGE_ENTRY AnsiCodePage; @@ -352,7 +357,6 @@ IntGetCodePageEntry(UINT CodePage) * Internal version of MultiByteToWideChar for UTF8. * * @see MultiByteToWideChar - * @todo Add UTF8 validity checks. */
static @@ -364,10 +368,12 @@ IntMultiByteToWideCharUTF8(DWORD Flags, LPWSTR WideCharString, INT WideCharCount) { - LPCSTR MbsEnd; - UCHAR Char, Length; + LPCSTR MbsEnd, MbsPtrSave; + UCHAR Char, TrailLength; WCHAR WideChar; LONG Count; + BOOL CharIsValid, StringIsValid = TRUE; + const WCHAR InvalidChar = 0xFFFD;
if (Flags != 0 && Flags != MB_ERR_INVALID_CHARS) { @@ -378,17 +384,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags, /* Does caller query for output buffer size? */ if (WideCharCount == 0) { + /* validate and count the wide characters */ MbsEnd = MultiByteString + MultiByteCount; for (; MultiByteString < MbsEnd; WideCharCount++) { Char = *MultiByteString++; if (Char < 0xC0) + { + TrailLength = 0; continue; - MultiByteString += UTF8Length[Char - 0x80]; + } + if (Char >= 0xF8 || (Char & 0xC0) == 0x80) + { + TrailLength = 0; + StringIsValid = FALSE; + continue; + } + + CharIsValid = TRUE; + MbsPtrSave = MultiByteString; + TrailLength = UTF8Length[Char - 0x80]; + WideChar = Char & UTF8Mask[TrailLength]; + + while (TrailLength && MultiByteString < MbsEnd) + { + if ((*MultiByteString & 0xC0) != 0x80) + { + CharIsValid = StringIsValid = FALSE; + break; + } + + WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f); + TrailLength--; + } + + if (!CharIsValid || WideChar < UTF8LBound[UTF8Length[Char - 0x80]]) + { + MultiByteString = MbsPtrSave; + } + } + + if (TrailLength) + { + WideCharCount++; } + + if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength)) + { + SetLastError(ERROR_NO_UNICODE_TRANSLATION); + return 0; + } + return WideCharCount; }
+ /* convert */ MbsEnd = MultiByteString + MultiByteCount; for (Count = 0; Count < WideCharCount && MultiByteString < MbsEnd; Count++) { @@ -396,20 +446,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags, if (Char < 0x80) { *WideCharString++ = Char; + TrailLength = 0; + continue; + } + if (Char >= 0xF8 || Char == 0x80 || (Char & 0xC0) == 0x80) + { + *WideCharString++ = InvalidChar; + TrailLength = 0; continue; } - Length = UTF8Length[Char - 0x80]; - WideChar = Char & UTF8Mask[Length]; - while (Length && MultiByteString < MbsEnd) + + CharIsValid = TRUE; + MbsPtrSave = MultiByteString; + TrailLength = UTF8Length[Char - 0x80]; + WideChar = Char & UTF8Mask[TrailLength]; + + while (TrailLength && MultiByteString < MbsEnd) { + if ((*MultiByteString & 0xC0) != 0x80) + { + CharIsValid = StringIsValid = FALSE; + break; + } + WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f); - Length--; + TrailLength--; + } + + if (CharIsValid && UTF8LBound[UTF8Length[Char - 0x80]] <= WideChar) + { + *WideCharString++ = WideChar; + } + else + { + *WideCharString++ = InvalidChar; + MultiByteString = MbsPtrSave; } - *WideCharString++ = WideChar; + } + + if (TrailLength && Count < WideCharCount && MultiByteString < MbsEnd) + { + *WideCharString = InvalidChar; + WideCharCount++; }
if (MultiByteString < MbsEnd) + { SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; + } + + if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength)) + { + SetLastError(ERROR_NO_UNICODE_TRANSLATION); + return 0; + }
return Count; } @@ -549,7 +640,7 @@ IntMultiByteToWideCharCP(UINT CodePage,
if (MultiByteString == MbsEnd) { - *WideCharString++ = UNICODE_NULL; + *WideCharString++ = MultiByteTable[Char]; } else if (*MultiByteString == 0) {