Author: cfinck
Date: Fri Jul 11 14:11:06 2008
New Revision: 34426
URL:
http://svn.reactos.org/svn/reactos?rev=34426&view=rev
Log:
- Port Wine's WideCharToMultiByte implementation for conversion to a codepage to
ReactOS. (with comments :-))
It adds support for DefaultChar, UsedDefaultChar and the flag WC_NO_BEST_FIT_CHARS.
WC_COMPOSITECHECK is also supported by the Wine implementation, but I don't have an
idea how to port it to ReactOS, as we don't seem to have composition tables. I left
FIXME's for this flag in the appropriate blocks, this is why some of the code might
look badly structured/unoptimized at the moment.
As we completely rely on the NLS tables for the conversion now, this commit might
trigger some bugs there. I already found out that the CP950 table doesn't map Unicode
0 back to MultiByte 0 (but 254), using Windows' c_950.nls it works correctly. Other
tables could be buggy as well, c_1252.nls worked flawlessy for me though.
- Added comments to the CPTABLEINFO structure based on documentation from
http://www.ping.uio.no/~ovehk/nls/
Modified:
trunk/reactos/dll/win32/kernel32/misc/nls.c
trunk/reactos/include/ddk/ntnls.h
Modified: trunk/reactos/dll/win32/kernel32/misc/nls.c
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/dll/win32/kernel32/misc/nl…
==============================================================================
--- trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] (original)
+++ trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] Fri Jul 11 14:11:06 2008
@@ -688,14 +688,65 @@
}
/**
+ * @name IsValidSBCSMapping
+ *
+ * Checks if ch (single-byte character) is a valid mapping for wch
+ *
+ * @see IntWideCharToMultiByteCP
+ */
+static inline BOOL
+IntIsValidSBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, UCHAR ch)
+{
+ /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match
exactly. */
+ if(Flags & WC_NO_BEST_FIT_CHARS)
+ return (CodePageTable->MultiByteTable[ch] != wch);
+
+ /* By default, all characters except TransDefaultChar apply as a valid mapping for ch
(so also "nearest" characters) */
+ if(ch != CodePageTable->TransDefaultChar)
+ return TRUE;
+
+ /* The only possible left valid mapping is the default character itself */
+ return (wch == CodePageTable->TransUniDefaultChar);
+}
+
+/**
+ * @name IsValidDBCSMapping
+ *
+ * Checks if ch (double-byte character) is a valid mapping for wch
+ *
+ * @see IntWideCharToMultiByteCP
+ */
+static inline BOOL
+IntIsValidDBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, USHORT ch)
+{
+ /* If ch is the default character, but the wch is not, it can't be a valid mapping
*/
+ if(ch == CodePageTable->TransDefaultChar && wch !=
CodePageTable->TransUniDefaultChar)
+ return FALSE;
+
+ /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match
exactly. */
+ if(Flags & WC_NO_BEST_FIT_CHARS)
+ {
+ if(ch & 0xff00)
+ {
+ UCHAR uOffset = CodePageTable->DBCSOffsets[ch >> 8];
+ return (CodePageTable->MultiByteTable[(uOffset << 8) + (ch & 0xff)]
== wch);
+ }
+
+ return (CodePageTable->MultiByteTable[ch] == wch);
+ }
+
+ /* If we're still here, we have a valid mapping */
+ return TRUE;
+}
+
+/**
* @name IntWideCharToMultiByteCP
*
* Internal version of WideCharToMultiByte for code page tables.
*
* @see WideCharToMultiByte
- * @todo Handle default characters and flags.
- */
-
+ * @todo Handle WC_COMPOSITECHECK
+ */
static INT STDCALL
IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
LPCWSTR WideCharString, INT WideCharCount,
@@ -715,108 +766,233 @@
}
CodePageTable = &CodePageEntry->CodePageTable;
+
/* Different handling for DBCS code pages. */
if (CodePageTable->MaximumCharacterSize > 1)
{
- /* FIXME */
-
- USHORT WideChar;
- USHORT MbChar;
+ /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more
work */
+ if(Flags || DefaultChar || UsedDefaultChar)
+ {
+ BOOL TempUsedDefaultChar;
+ USHORT DefChar;
+
+ /* If UsedDefaultChar is not set, set it to a temporary value, so we don't
have to check on every character */
+ if(!UsedDefaultChar)
+ UsedDefaultChar = &TempUsedDefaultChar;
+
+ *UsedDefaultChar = FALSE;
+
+ /* Use the CodePage's TransDefaultChar if none was given. Don't modify
the DefaultChar pointer here. */
+ if(DefaultChar)
+ DefChar = DefaultChar[1] ? ((DefaultChar[0] << 8) | DefaultChar[1]) :
DefaultChar[0];
+ else
+ DefChar = CodePageTable->TransDefaultChar;
+
+ /* Does caller query for output buffer size? */
+ if(!MultiByteCount)
+ {
+ for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++,
TempLength++)
+ {
+ USHORT uChar;
+
+ if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
+ {
+ /* FIXME: Handle WC_COMPOSITECHECK */
+ }
+
+ uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
+
+ /* Verify if the mapping is valid for handling DefaultChar and
UsedDefaultChar */
+ if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
+ {
+ uChar = DefChar;
+ *UsedDefaultChar = TRUE;
+ }
+
+ /* Increment TempLength again if this is a double-byte character */
+ if(uChar & 0xff00)
+ TempLength++;
+ }
+
+ return TempLength;
+ }
+
+ /* Convert the WideCharString to the MultiByteString and verify if the mapping
is valid */
+ for(TempLength = MultiByteCount; WideCharCount && TempLength;
TempLength--, WideCharString++, WideCharCount--)
+ {
+ USHORT uChar;
+
+ if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
+ {
+ /* FIXME: Handle WC_COMPOSITECHECK */
+ }
+
+ uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
+
+ /* Verify if the mapping is valid for handling DefaultChar and
UsedDefaultChar */
+ if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
+ {
+ uChar = DefChar;
+ *UsedDefaultChar = TRUE;
+ }
+
+ /* Handle double-byte characters */
+ if(uChar & 0xff00)
+ {
+ /* Don't output a partial character */
+ if(TempLength == 1)
+ break;
+
+ TempLength--;
+ *MultiByteString++ = uChar >> 8;
+ }
+
+ *MultiByteString++ = (char)uChar;
+ }
+
+ /* WideCharCount should be 0 if all characters were converted */
+ if(WideCharCount)
+ {
+ SetLastError(ERROR_INSUFFICIENT_BUFFER);
+ return 0;
+ }
+
+ return MultiByteCount - TempLength;
+ }
/* Does caller query for output buffer size? */
- if (MultiByteCount == 0)
- {
- for (TempLength = 0; WideCharCount; WideCharCount--, TempLength++)
- {
- WideChar = *WideCharString++;
-
- if (WideChar < 0x80)
- continue;
-
- MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
-
- if (!(MbChar & 0xff00))
- continue;
-
- TempLength++;
+ if(!MultiByteCount)
+ {
+ for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++,
TempLength++)
+ {
+ /* Increment TempLength again if this is a double-byte character */
+ if (((PWCHAR)CodePageTable->WideCharTable)[*WideCharString] & 0xff00)
+ TempLength++;
}
return TempLength;
}
- for (TempLength = MultiByteCount; WideCharCount; WideCharCount--)
- {
- WideChar = *WideCharString++;
-
- if (WideChar < 0x80)
- {
- if (!TempLength)
+ /* Convert the WideCharString to the MultiByteString */
+ for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--,
WideCharString++, WideCharCount--)
+ {
+ USHORT uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
+
+ /* Is this a double-byte character? */
+ if(uChar & 0xff00)
+ {
+ /* Don't output a partial character */
+ if(TempLength == 1)
+ break;
+
+ TempLength--;
+ *MultiByteString++ = uChar >> 8;
+ }
+
+ *MultiByteString++ = (char)uChar;
+ }
+
+ /* WideCharCount should be 0 if all characters were converted */
+ if(WideCharCount)
+ {
+ SetLastError(ERROR_INSUFFICIENT_BUFFER);
+ return 0;
+ }
+
+ return MultiByteCount - TempLength;
+ }
+ else /* Not DBCS code page */
+ {
+ INT nReturn;
+
+ /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more
work */
+ if(Flags || DefaultChar || UsedDefaultChar)
+ {
+ BOOL TempUsedDefaultChar;
+ CHAR DefChar;
+
+ /* If UsedDefaultChar is not set, set it to a temporary value, so we don't
have to check on every character */
+ if(!UsedDefaultChar)
+ UsedDefaultChar = &TempUsedDefaultChar;
+
+ *UsedDefaultChar = FALSE;
+
+ /* Does caller query for output buffer size? */
+ if(!MultiByteCount)
+ {
+ /* Loop through the whole WideCharString and check if we can get a valid
mapping for each character */
+ for(TempLength = 0; WideCharCount; TempLength++, WideCharString++,
WideCharCount--)
{
- SetLastError(ERROR_INSUFFICIENT_BUFFER);
- break;
+ if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
+ {
+ /* FIXME: Handle WC_COMPOSITECHECK */
+ }
+
+ if(!*UsedDefaultChar)
+ *UsedDefaultChar = !IntIsValidSBCSMapping(CodePageTable, Flags,
*WideCharString, ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]);
}
- TempLength--;
-
- *MultiByteString++ = (CHAR)WideChar;
- continue;
- }
-
- MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
-
- if (!(MbChar & 0xff00))
- {
- if (!TempLength)
+
+ return TempLength;
+ }
+
+ /* Use the CodePage's TransDefaultChar if none was given. Don't modify
the DefaultChar pointer here. */
+ if(DefaultChar)
+ DefChar = *DefaultChar;
+ else
+ DefChar = CodePageTable->TransDefaultChar;
+
+ /* Convert the WideCharString to the MultiByteString and verify if the mapping
is valid */
+ for(TempLength = MultiByteCount; WideCharCount && TempLength;
MultiByteString++, TempLength--, WideCharString++, WideCharCount--)
+ {
+ if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
{
- SetLastError(ERROR_INSUFFICIENT_BUFFER);
- break;
+ /* FIXME: Handle WC_COMPOSITECHECK */
}
- TempLength--;
-
- *MultiByteString++ = (CHAR)MbChar;
- continue;;
- }
-
- if (TempLength >= 2)
- {
- MultiByteString[1] = (CHAR)MbChar; MbChar >>= 8;
- MultiByteString[0] = (CHAR)MbChar;
- MultiByteString += 2;
- TempLength -= 2;
- }
- else
+
+ *MultiByteString =
((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
+
+ if(!IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString,
*MultiByteString))
+ {
+ *MultiByteString = DefChar;
+ *UsedDefaultChar = TRUE;
+ }
+ }
+
+ /* WideCharCount should be 0 if all characters were converted */
+ if(WideCharCount)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
- break;
- }
- }
-
- return MultiByteCount - TempLength;
- }
- else /* Not DBCS code page */
- {
+ return 0;
+ }
+
+ return MultiByteCount - TempLength;
+ }
+
/* Does caller query for output buffer size? */
- if (MultiByteCount == 0)
+ if(!MultiByteCount)
return WideCharCount;
- /* Adjust buffer size. Wine trick ;-) */
- if (MultiByteCount < WideCharCount)
- {
+ /* Is the buffer large enough? */
+ if(MultiByteCount < WideCharCount)
+ {
+ /* Convert the string up to MultiByteCount and return 0 */
WideCharCount = MultiByteCount;
SetLastError(ERROR_INSUFFICIENT_BUFFER);
- }
-
- for (TempLength = WideCharCount;
- TempLength > 0;
- WideCharString++, TempLength--)
- {
- *MultiByteString++ = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
- }
-
- /* FIXME */
- if (UsedDefaultChar != NULL)
- *UsedDefaultChar = FALSE;
-
- return WideCharCount;
+ nReturn = 0;
+ }
+ else
+ {
+ /* Otherwise WideCharCount will be the number of converted characters */
+ nReturn = WideCharCount;
+ }
+
+ /* Convert the WideCharString to the MultiByteString */
+ for(TempLength = WideCharCount; --TempLength >= 0; WideCharString++,
MultiByteString++)
+ {
+ *MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
+ }
+
+ return nReturn;
}
}
Modified: trunk/reactos/include/ddk/ntnls.h
URL:
http://svn.reactos.org/svn/reactos/trunk/reactos/include/ddk/ntnls.h?rev=34…
==============================================================================
--- trunk/reactos/include/ddk/ntnls.h [iso-8859-1] (original)
+++ trunk/reactos/include/ddk/ntnls.h [iso-8859-1] Fri Jul 11 14:11:06 2008
@@ -7,18 +7,20 @@
extern "C" {
#endif
#define MAXIMUM_LEADBYTES 12
+
+/* Some documentation can be found here:
http://www.ping.uio.no/~ovehk/nls/ */
typedef struct _CPTABLEINFO
{
USHORT CodePage;
- USHORT MaximumCharacterSize;
- USHORT DefaultChar;
- USHORT UniDefaultChar;
- USHORT TransDefaultChar;
- USHORT TransUniDefaultChar;
+ USHORT MaximumCharacterSize; /* 1 = SBCS, 2 = DBCS */
+ USHORT DefaultChar; /* Default MultiByte Character for the
CP->Unicode conversion */
+ USHORT UniDefaultChar; /* Default Unicode Character for the
CP->Unicode conversion */
+ USHORT TransDefaultChar; /* Default MultiByte Character for the
Unicode->CP conversion */
+ USHORT TransUniDefaultChar; /* Default Unicode Character for the
Unicode->CP conversion */
USHORT DBCSCodePage;
UCHAR LeadByte[MAXIMUM_LEADBYTES];
- PUSHORT MultiByteTable;
- PVOID WideCharTable;
+ PUSHORT MultiByteTable; /* Table for CP->Unicode conversion */
+ PVOID WideCharTable; /* Table for Unicode->CP conversion */
PUSHORT DBCSRanges;
PUSHORT DBCSOffsets;
} CPTABLEINFO, *PCPTABLEINFO;