Author: cfinck Date: Fri Jul 11 14:11:06 2008 New Revision: 34426
URL: http://svn.reactos.org/svn/reactos?rev=34426&view=rev Log: - Port Wine's WideCharToMultiByte implementation for conversion to a codepage to ReactOS. (with comments :-)) It adds support for DefaultChar, UsedDefaultChar and the flag WC_NO_BEST_FIT_CHARS. WC_COMPOSITECHECK is also supported by the Wine implementation, but I don't have an idea how to port it to ReactOS, as we don't seem to have composition tables. I left FIXME's for this flag in the appropriate blocks, this is why some of the code might look badly structured/unoptimized at the moment. As we completely rely on the NLS tables for the conversion now, this commit might trigger some bugs there. I already found out that the CP950 table doesn't map Unicode 0 back to MultiByte 0 (but 254), using Windows' c_950.nls it works correctly. Other tables could be buggy as well, c_1252.nls worked flawlessy for me though. - Added comments to the CPTABLEINFO structure based on documentation from http://www.ping.uio.no/~ovehk/nls/
Modified: trunk/reactos/dll/win32/kernel32/misc/nls.c trunk/reactos/include/ddk/ntnls.h
Modified: trunk/reactos/dll/win32/kernel32/misc/nls.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/dll/win32/kernel32/misc/nls... ============================================================================== --- trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] (original) +++ trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] Fri Jul 11 14:11:06 2008 @@ -688,14 +688,65 @@ }
/** + * @name IsValidSBCSMapping + * + * Checks if ch (single-byte character) is a valid mapping for wch + * + * @see IntWideCharToMultiByteCP + */ +static inline BOOL +IntIsValidSBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, UCHAR ch) +{ + /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */ + if(Flags & WC_NO_BEST_FIT_CHARS) + return (CodePageTable->MultiByteTable[ch] != wch); + + /* By default, all characters except TransDefaultChar apply as a valid mapping for ch (so also "nearest" characters) */ + if(ch != CodePageTable->TransDefaultChar) + return TRUE; + + /* The only possible left valid mapping is the default character itself */ + return (wch == CodePageTable->TransUniDefaultChar); +} + +/** + * @name IsValidDBCSMapping + * + * Checks if ch (double-byte character) is a valid mapping for wch + * + * @see IntWideCharToMultiByteCP + */ +static inline BOOL +IntIsValidDBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, USHORT ch) +{ + /* If ch is the default character, but the wch is not, it can't be a valid mapping */ + if(ch == CodePageTable->TransDefaultChar && wch != CodePageTable->TransUniDefaultChar) + return FALSE; + + /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */ + if(Flags & WC_NO_BEST_FIT_CHARS) + { + if(ch & 0xff00) + { + UCHAR uOffset = CodePageTable->DBCSOffsets[ch >> 8]; + return (CodePageTable->MultiByteTable[(uOffset << 8) + (ch & 0xff)] == wch); + } + + return (CodePageTable->MultiByteTable[ch] == wch); + } + + /* If we're still here, we have a valid mapping */ + return TRUE; +} + +/** * @name IntWideCharToMultiByteCP * * Internal version of WideCharToMultiByte for code page tables. * * @see WideCharToMultiByte - * @todo Handle default characters and flags. - */ - + * @todo Handle WC_COMPOSITECHECK + */ static INT STDCALL IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags, LPCWSTR WideCharString, INT WideCharCount, @@ -715,108 +766,233 @@ } CodePageTable = &CodePageEntry->CodePageTable;
+ /* Different handling for DBCS code pages. */ if (CodePageTable->MaximumCharacterSize > 1) { - /* FIXME */ - - USHORT WideChar; - USHORT MbChar; + /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */ + if(Flags || DefaultChar || UsedDefaultChar) + { + BOOL TempUsedDefaultChar; + USHORT DefChar; + + /* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */ + if(!UsedDefaultChar) + UsedDefaultChar = &TempUsedDefaultChar; + + *UsedDefaultChar = FALSE; + + /* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */ + if(DefaultChar) + DefChar = DefaultChar[1] ? ((DefaultChar[0] << 8) | DefaultChar[1]) : DefaultChar[0]; + else + DefChar = CodePageTable->TransDefaultChar; + + /* Does caller query for output buffer size? */ + if(!MultiByteCount) + { + for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++) + { + USHORT uChar; + + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; + + /* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */ + if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar)) + { + uChar = DefChar; + *UsedDefaultChar = TRUE; + } + + /* Increment TempLength again if this is a double-byte character */ + if(uChar & 0xff00) + TempLength++; + } + + return TempLength; + } + + /* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--) + { + USHORT uChar; + + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; + + /* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */ + if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar)) + { + uChar = DefChar; + *UsedDefaultChar = TRUE; + } + + /* Handle double-byte characters */ + if(uChar & 0xff00) + { + /* Don't output a partial character */ + if(TempLength == 1) + break; + + TempLength--; + *MultiByteString++ = uChar >> 8; + } + + *MultiByteString++ = (char)uChar; + } + + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; + } + + return MultiByteCount - TempLength; + }
/* Does caller query for output buffer size? */ - if (MultiByteCount == 0) - { - for (TempLength = 0; WideCharCount; WideCharCount--, TempLength++) - { - WideChar = *WideCharString++; - - if (WideChar < 0x80) - continue; - - MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar]; - - if (!(MbChar & 0xff00)) - continue; - - TempLength++; + if(!MultiByteCount) + { + for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++) + { + /* Increment TempLength again if this is a double-byte character */ + if (((PWCHAR)CodePageTable->WideCharTable)[*WideCharString] & 0xff00) + TempLength++; }
return TempLength; }
- for (TempLength = MultiByteCount; WideCharCount; WideCharCount--) - { - WideChar = *WideCharString++; - - if (WideChar < 0x80) - { - if (!TempLength) + /* Convert the WideCharString to the MultiByteString */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--) + { + USHORT uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; + + /* Is this a double-byte character? */ + if(uChar & 0xff00) + { + /* Don't output a partial character */ + if(TempLength == 1) + break; + + TempLength--; + *MultiByteString++ = uChar >> 8; + } + + *MultiByteString++ = (char)uChar; + } + + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; + } + + return MultiByteCount - TempLength; + } + else /* Not DBCS code page */ + { + INT nReturn; + + /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */ + if(Flags || DefaultChar || UsedDefaultChar) + { + BOOL TempUsedDefaultChar; + CHAR DefChar; + + /* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */ + if(!UsedDefaultChar) + UsedDefaultChar = &TempUsedDefaultChar; + + *UsedDefaultChar = FALSE; + + /* Does caller query for output buffer size? */ + if(!MultiByteCount) + { + /* Loop through the whole WideCharString and check if we can get a valid mapping for each character */ + for(TempLength = 0; WideCharCount; TempLength++, WideCharString++, WideCharCount--) { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - break; + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + if(!*UsedDefaultChar) + *UsedDefaultChar = !IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]); } - TempLength--; - - *MultiByteString++ = (CHAR)WideChar; - continue; - } - - MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar]; - - if (!(MbChar & 0xff00)) - { - if (!TempLength) + + return TempLength; + } + + /* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */ + if(DefaultChar) + DefChar = *DefaultChar; + else + DefChar = CodePageTable->TransDefaultChar; + + /* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; MultiByteString++, TempLength--, WideCharString++, WideCharCount--) + { + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - break; + /* FIXME: Handle WC_COMPOSITECHECK */ } - TempLength--; - - *MultiByteString++ = (CHAR)MbChar; - continue;; - } - - if (TempLength >= 2) - { - MultiByteString[1] = (CHAR)MbChar; MbChar >>= 8; - MultiByteString[0] = (CHAR)MbChar; - MultiByteString += 2; - TempLength -= 2; - } - else + + *MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; + + if(!IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, *MultiByteString)) + { + *MultiByteString = DefChar; + *UsedDefaultChar = TRUE; + } + } + + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) { SetLastError(ERROR_INSUFFICIENT_BUFFER); - break; - } - } - - return MultiByteCount - TempLength; - } - else /* Not DBCS code page */ - { + return 0; + } + + return MultiByteCount - TempLength; + } + /* Does caller query for output buffer size? */ - if (MultiByteCount == 0) + if(!MultiByteCount) return WideCharCount;
- /* Adjust buffer size. Wine trick ;-) */ - if (MultiByteCount < WideCharCount) - { + /* Is the buffer large enough? */ + if(MultiByteCount < WideCharCount) + { + /* Convert the string up to MultiByteCount and return 0 */ WideCharCount = MultiByteCount; SetLastError(ERROR_INSUFFICIENT_BUFFER); - } - - for (TempLength = WideCharCount; - TempLength > 0; - WideCharString++, TempLength--) - { - *MultiByteString++ = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; - } - - /* FIXME */ - if (UsedDefaultChar != NULL) - *UsedDefaultChar = FALSE; - - return WideCharCount; + nReturn = 0; + } + else + { + /* Otherwise WideCharCount will be the number of converted characters */ + nReturn = WideCharCount; + } + + /* Convert the WideCharString to the MultiByteString */ + for(TempLength = WideCharCount; --TempLength >= 0; WideCharString++, MultiByteString++) + { + *MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; + } + + return nReturn; } }
Modified: trunk/reactos/include/ddk/ntnls.h URL: http://svn.reactos.org/svn/reactos/trunk/reactos/include/ddk/ntnls.h?rev=344... ============================================================================== --- trunk/reactos/include/ddk/ntnls.h [iso-8859-1] (original) +++ trunk/reactos/include/ddk/ntnls.h [iso-8859-1] Fri Jul 11 14:11:06 2008 @@ -7,18 +7,20 @@ extern "C" { #endif #define MAXIMUM_LEADBYTES 12 + +/* Some documentation can be found here: http://www.ping.uio.no/~ovehk/nls/ */ typedef struct _CPTABLEINFO { USHORT CodePage; - USHORT MaximumCharacterSize; - USHORT DefaultChar; - USHORT UniDefaultChar; - USHORT TransDefaultChar; - USHORT TransUniDefaultChar; + USHORT MaximumCharacterSize; /* 1 = SBCS, 2 = DBCS */ + USHORT DefaultChar; /* Default MultiByte Character for the CP->Unicode conversion */ + USHORT UniDefaultChar; /* Default Unicode Character for the CP->Unicode conversion */ + USHORT TransDefaultChar; /* Default MultiByte Character for the Unicode->CP conversion */ + USHORT TransUniDefaultChar; /* Default Unicode Character for the Unicode->CP conversion */ USHORT DBCSCodePage; UCHAR LeadByte[MAXIMUM_LEADBYTES]; - PUSHORT MultiByteTable; - PVOID WideCharTable; + PUSHORT MultiByteTable; /* Table for CP->Unicode conversion */ + PVOID WideCharTable; /* Table for Unicode->CP conversion */ PUSHORT DBCSRanges; PUSHORT DBCSOffsets; } CPTABLEINFO, *PCPTABLEINFO;