Author: cfinck Date: Wed Jul 16 16:40:09 2008 New Revision: 34557
URL: http://svn.reactos.org/svn/reactos?rev=34557&view=rev Log: - Remove 2 wrong versions of wctomb and 2 wrong versions of wcstombs - Implement new versions of wctomb and wcstombs, which consider the language set by setlocale() and work according to all behaviours I could find when testing under WinXP SP2. This was tested with an own test suite (which I can commit as well if anyone is interested) - Do a real conversion to MultiByte characters using wctomb in fputwc and vfwprintf. (verified under WinXP SP2) - Set 'MSVCRT___lc_codepage' and 'MSVCRT___lc_collate_cp' to 1252 by default ("C" locale) and not the current active codepage (which might not work with i.e. Eastern codepages) - Add a new check for 'MultiByteCount < 0' to WideCharToMultiByte (also verified under WinXP SP2) - Change MB_LEN_MAX back to 2, the value 5 only applies to newer CRT's (msvcrt only handles single-byte and double-byte characters) - Don't compile the Wine-imported 'wcscpy_s', it isn't available in msvcrt
Removed: trunk/reactos/lib/sdk/crt/string/wcstom.c trunk/reactos/lib/sdk/crt/string/wcstomb.c trunk/reactos/lib/sdk/crt/string/wcstombs.c trunk/reactos/lib/sdk/crt/string/wctomb.c Modified: trunk/reactos/dll/win32/kernel32/misc/nls.c trunk/reactos/include/crt/limits.h trunk/reactos/lib/sdk/crt/crt.rbuild trunk/reactos/lib/sdk/crt/include/internal/mbstring.h trunk/reactos/lib/sdk/crt/locale/locale.c trunk/reactos/lib/sdk/crt/stdio/file.c trunk/reactos/lib/sdk/crt/string/wcs.c
Modified: trunk/reactos/dll/win32/kernel32/misc/nls.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/dll/win32/kernel32/misc/nls... ============================================================================== --- trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] (original) +++ trunk/reactos/dll/win32/kernel32/misc/nls.c [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -1295,7 +1295,8 @@ /* Check the parameters. */ if (WideCharString == NULL || (MultiByteString == NULL && MultiByteCount > 0) || - (PVOID)WideCharString == (PVOID)MultiByteString) + (PVOID)WideCharString == (PVOID)MultiByteString || + MultiByteCount < 0) { SetLastError(ERROR_INVALID_PARAMETER); return 0;
Modified: trunk/reactos/include/crt/limits.h URL: http://svn.reactos.org/svn/reactos/trunk/reactos/include/crt/limits.h?rev=34... ============================================================================== --- trunk/reactos/include/crt/limits.h [iso-8859-1] (original) +++ trunk/reactos/include/crt/limits.h [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -34,7 +34,7 @@ * Characteristics of the char data type. */ #define CHAR_BIT 8 -#define MB_LEN_MAX 5 +#define MB_LEN_MAX 2
#define SCHAR_MIN (-128) #define SCHAR_MAX 127
Modified: trunk/reactos/lib/sdk/crt/crt.rbuild URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/crt.rbuild?rev=... ============================================================================== --- trunk/reactos/lib/sdk/crt/crt.rbuild [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/crt.rbuild [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -355,9 +355,7 @@ <file>strxfrm.c</file> <file>wcs.c</file> <file>wcstol.c</file> - <file>wcstombs.c</file> <file>wcstoul.c</file> - <file>wctomb.c</file> <file>wsplitp.c</file> <file>wtoi.c</file> <file>wtoi64.c</file>
Modified: trunk/reactos/lib/sdk/crt/include/internal/mbstring.h URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/include/interna... ============================================================================== --- trunk/reactos/lib/sdk/crt/include/internal/mbstring.h [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/include/internal/mbstring.h [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -37,7 +37,10 @@ #define LT (_MLEAD | _MTRAIL) #define PT (_MPUNCT | _MTRAIL)
+#define MAX_LOCALE_LENGTH 256 extern unsigned char _mbctype[257]; +extern int MSVCRT___lc_codepage; +extern char MSVCRT_current_lc_all[MAX_LOCALE_LENGTH];
#if defined (_MSC_VER)
Modified: trunk/reactos/lib/sdk/crt/locale/locale.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/locale/locale.c... ============================================================================== --- trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/locale/locale.c [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -26,7 +26,6 @@ * string to produce lc_all. */ #define MAX_ELEM_LEN 64 /* Max length of country/language/CP string */ -#define MAX_LOCALE_LENGTH 256
unsigned char MSVCRT_mbctype[257]; static int g_mbcp_is_multibyte = 0; @@ -388,8 +387,8 @@ { MSVCRT_current_lc_all[0] = 'C'; MSVCRT_current_lc_all[1] = '\0'; - MSVCRT___lc_codepage = GetACP(); - MSVCRT___lc_collate_cp = GetACP(); + MSVCRT___lc_codepage = 1252; + MSVCRT___lc_collate_cp = 1252;
switch (category) { case MSVCRT_LC_ALL:
Modified: trunk/reactos/lib/sdk/crt/stdio/file.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/stdio/file.c?re... ============================================================================== --- trunk/reactos/lib/sdk/crt/stdio/file.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/stdio/file.c [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -2475,23 +2475,28 @@ */ wint_t CDECL fputwc(wint_t wc, FILE* file) { - if (file->_flag & _IOBINARY) - { - wchar_t mwc = wc; - - if (fwrite( &mwc, sizeof(mwc), 1, file) != 1) - return WEOF; - } - else - { - /* Convert the character to ANSI */ - char c = (unsigned char)wc; - - if (fwrite( &c, sizeof(c), 1, file) != 1) - return WEOF; - } - - return wc; + if (file->_flag & _IOBINARY) + { + if (fwrite(&wc, sizeof(wc), 1, file) != 1) + return WEOF; + } + else + { + /* Convert to multibyte in text mode */ + char mbc[MB_LEN_MAX]; + int mb_return; + + mb_return = wctomb(mbc, wc); + + if(mb_return == -1) + return WEOF; + + /* Output all characters */ + if (fwrite(mbc, mb_return, 1, file) != 1) + return WEOF; + } + + return wc; }
/********************************************************************* @@ -3121,7 +3126,6 @@ int CDECL vfwprintf(FILE* file, const wchar_t *format, va_list valist) { wchar_t buf[2048], *mem = buf; - char mbbuf[2048], *mbmem = mbbuf; int written, resize = sizeof(buf) / sizeof(wchar_t), retval; /* See vfprintf comments */ while ((written = _vsnwprintf(mem, resize, format, valist)) == -1 || @@ -3137,17 +3141,22 @@ /* Check if outputting to a text-file */ if (fdesc[file->_file].wxflag & WX_TEXT) { - /* Convert to multibyte then */ - written = wcstombs(NULL, mem, 0); - - if (written >= sizeof(mbbuf) && (written != (int)-1)) - mbmem = malloc(written + 1); - - wcstombs(mbmem, mem, written); - retval = fwrite(mbmem, 1, written, file); - - if (mbmem != mbbuf) - free(mbmem); + /* Convert each character and stop at the first invalid character. Behavior verified by tests under WinXP SP2 */ + char chMultiByte[MB_LEN_MAX]; + int nReturn; + + retval = 0; + + while(*mem) + { + nReturn = wctomb(chMultiByte, *mem); + + if(nReturn == -1) + break; + + retval += fwrite(chMultiByte, 1, nReturn, file); + mem++; + } } else {
Modified: trunk/reactos/lib/sdk/crt/string/wcs.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/string/wcs.c?re... ============================================================================== --- trunk/reactos/lib/sdk/crt/string/wcs.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/string/wcs.c [iso-8859-1] Wed Jul 16 16:40:09 2008 @@ -40,12 +40,6 @@
//WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
-// HACK for LIBCNT -#ifndef debugstr_w -#define debugstr_w -#endif - - #undef sprintf #undef wsprintf #undef snprintf @@ -77,6 +71,7 @@ return strcmpiW( str1, str2 ); } #endif + /********************************************************************* * _wcsnset (MSVCRT.@) */ @@ -102,6 +97,7 @@ } return ret; } + #ifndef _LIBCNT_ /********************************************************************* * _wcsset (MSVCRT.@) @@ -854,6 +850,7 @@ return _vsnwprintf( str, INT_MAX, format, args ); } #endif + /********************************************************************* * wcscoll (MSVCRT.@) */ @@ -876,6 +873,7 @@ } return NULL; } + #ifndef _LIBCNT_ /********************************************************************* * wcstok (MSVCRT.@) @@ -896,17 +894,131 @@ data->wcstok_next = str; return ret; } + +/********************************************************************* + * wctomb (MSVCRT.@) + */ +INT CDECL wctomb(char *mbchar, wchar_t wchar) +{ + BOOL bUsedDefaultChar; + char chMultiByte[MB_LEN_MAX]; + int nBytes; + + /* At least one parameter needs to be given, the length of a null character cannot be queried (verified by tests under WinXP SP2) */ + if(!mbchar && !wchar) + return 0; + + /* Use WideCharToMultiByte for doing the conversion using the codepage currently set with setlocale() */ + nBytes = WideCharToMultiByte(MSVCRT___lc_codepage, 0, &wchar, 1, chMultiByte, MB_LEN_MAX, NULL, &bUsedDefaultChar); + + /* Only copy the character if an 'mbchar' pointer was given. + + The "C" locale is emulated with codepage 1252 here. This codepage has a default character "?", but the "C" locale doesn't have one. + Therefore don't copy the character in this case. */ + if(mbchar && !(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1] && bUsedDefaultChar)) + memcpy(mbchar, chMultiByte, nBytes); + + /* If the default character was used, set errno to EILSEQ and return -1. */ + if(bUsedDefaultChar) + { + __set_errno(EILSEQ); + return -1; + } + + /* Otherwise return the number of bytes this character occupies. */ + return nBytes; +} + +size_t CDECL wcstombs(char *mbstr, const wchar_t *wcstr, size_t count) +{ + BOOL bUsedDefaultChar; + char* p = mbstr; + int nResult; + + /* Does the caller query for output buffer size? */ + if(!mbstr) + { + int nLength; + + /* If we currently use the "C" locale, the length of the input string is returned (verified by tests under WinXP SP2) */ + if(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1]) + return wcslen(wcstr); + + /* Otherwise check the length each character needs and build a final return value out of this */ + count = wcslen(wcstr); + nLength = 0; + + while((int)(--count) >= 0 && *wcstr) + { + /* Get the length of this character */ + nResult = wctomb(NULL, *wcstr++); + + /* If this character is not convertible in the current locale, the end result will be -1 */ + if(nResult == -1) + return -1; + + nLength += nResult; + } + + /* Return the final length */ + return nLength; + } + + /* Convert the string then */ + bUsedDefaultChar = FALSE; + + for(;;) + { + char chMultiByte[MB_LEN_MAX]; + UINT uLength; + + /* Are we at the terminating null character? */ + if(!*wcstr) + { + /* Set the null character, but don't increment the pointer as the returned length never includes the terminating null character */ + *p = 0; + break; + } + + /* Convert this character into the temporary chMultiByte variable */ + ZeroMemory(chMultiByte, MB_LEN_MAX); + nResult = wctomb(chMultiByte, *wcstr++); + + /* Check if this was an invalid character */ + if(nResult == -1) + bUsedDefaultChar = TRUE; + + /* If we got no character, stop the conversion process here */ + if(!chMultiByte[0]) + break; + + /* Determine whether this is a double-byte or a single-byte character */ + if(chMultiByte[1]) + uLength = 2; + else + uLength = 1; + + /* Decrease 'count' by the character length and check if the buffer can still hold the full character */ + count -= uLength; + + if((int)count < 0) + break; + + /* It can, so copy it and move the pointer forward */ + memcpy(p, chMultiByte, uLength); + p += uLength; + } + + if(bUsedDefaultChar) + return -1; + + /* Return the length in bytes of the copied characters (without the terminating null character) */ + return p - mbstr; +} #endif + #ifndef __REACTOS__ /********************************************************************* - * wctomb (MSVCRT.@) - */ -INT CDECL wctomb( char *dst, wchar_t ch ) -{ - return WideCharToMultiByte( CP_ACP, 0, &ch, 1, dst, 6, NULL, NULL ); -} - -/********************************************************************* * iswalnum (MSVCRT.@) */ INT CDECL iswalnum( wchar_t wc ) @@ -993,8 +1105,7 @@ { return isxdigitW( wc ); } -#endif -#ifndef _LIBCNT_ + /********************************************************************* * wcscpy_s (MSVCRT.@) */
Removed: trunk/reactos/lib/sdk/crt/string/wcstom.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/string/wcstom.c... ============================================================================== --- trunk/reactos/lib/sdk/crt/string/wcstom.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/string/wcstom.c (removed) @@ -1,39 +1,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS system libraries - * FILE: lib/msvcrt/mbstring/wcstom.c - * PURPOSE: - * PROGRAMER: - * UPDATE HISTORY: - * 05/30/08: Samuel Serapion adapted from PROJECT C Library - * - */ - -#include <precomp.h> -#include <mbctype.h> - -/* - * @implemented - */ -size_t wcstombs (char *string, const wchar_t *widechar, size_t count) -{ - int n, bytes; - int cnt = 0; - - for (n = 0; n < count; n++) { - - if ((bytes = wctomb (string, *widechar)) < 0) - return -1; - - if (*string == 0) - return cnt; - - widechar++; - string += bytes; - cnt += bytes; - } - - return cnt; -} - -
Removed: trunk/reactos/lib/sdk/crt/string/wcstomb.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/string/wcstomb.... ============================================================================== --- trunk/reactos/lib/sdk/crt/string/wcstomb.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/string/wcstomb.c (removed) @@ -1,114 +1,0 @@ -/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#include <precomp.h> -#include <wchar.h> - -#ifndef EILSEQ -#define EILSEQ EINVAL -#endif - -static const wchar_t encoding_mask[] = -{ - (wchar_t)~0x7ff, (wchar_t)~0xffff, (wchar_t)~0x1fffff, (wchar_t)~0x3ffffff -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - -/* The state is for this UTF8 encoding not used. */ -//static mbstate_t internal; - - -//extern mbstate_t __no_r_state; /* Defined in mbtowc.c. */ - -size_t -__wcrtomb (char *s, wchar_t wc); - -/* - * Convert WCHAR into its multibyte character representation, - * putting this in S and returning its length. - * - * Attention: this function should NEVER be intentionally used. - * The interface is completely stupid. The state is shared between - * all conversion functions. You should use instead the restartable - * version `wcrtomb'. - * - * @implemented - */ -int -wctomb (char *s, wchar_t wchar) -{ - /* If S is NULL the function has to return null or not null - depending on the encoding having a state depending encoding or - not. This is nonsense because any multibyte encoding has a - state. The ISO C amendment 1 corrects this while introducing the - restartable functions. We simply say here all encodings have a - state. */ - if (s == NULL) - return 1; - - return __wcrtomb (s, wchar); -} - - -size_t -__wcrtomb (char *s, wchar_t wc) -{ - char fake[1]; - size_t written = 0; - - - - if (s == NULL) - { - s = fake; - wc = L'\0'; - } - - if (wc < 0x80) - { - /* It's a one byte sequence. */ - if (s != NULL) - *s = (char) wc; - return 1; - } - - for (written = 2; written < 6; ++written) - if ((wc & encoding_mask[written - 2]) == 0) - break; - - if (s != NULL) - { - size_t cnt = written; - s[0] = encoding_byte[cnt - 2]; - - --cnt; - do - { - s[cnt] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--cnt > 0); - s[0] |= wc; - } - - return written; -}
Removed: trunk/reactos/lib/sdk/crt/string/wcstombs.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/string/wcstombs... ============================================================================== --- trunk/reactos/lib/sdk/crt/string/wcstombs.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/string/wcstombs.c (removed) @@ -1,157 +1,0 @@ -/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#include <precomp.h> -#include <wchar.h> - -#ifndef EILSEQ -#define EILSEQ EINVAL -#endif - - -static const wchar_t encoding_mask[] = -{ - (~0x7ff&WCHAR_MAX), (~0xffff&WCHAR_MAX), (~0x1fffff&WCHAR_MAX), (~0x3ffffff&WCHAR_MAX) -}; - -static const unsigned char encoding_byte[] = -{ - 0xc0, 0xe0, 0xf0, 0xf8, 0xfc -}; - -/* We don't need the state really because we don't have shift states - to maintain between calls to this function. */ - -static mbstate_t mbstate_internal; - - -mbstate_t __no_r_state; /* Now defined in wcstombs.c. */ -//extern mbstate_t __no_r_state; /* Defined in mbtowc.c. */ - -size_t -__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps); - -/* - * Convert the `wchar_t' string in PWCS to a multibyte character string - * in S, writing no more than N characters. Return the number of bytes - * written, or (size_t) -1 if an invalid `wchar_t' was found. - * - * Attention: this function should NEVER be intentionally used. - * The interface is completely stupid. The state is shared between - * all conversion functions. You should use instead the restartable - * version `wcsrtombs'. - * - * @implemented - */ -size_t -wcstombs (char *s, const wchar_t *pwcs, size_t n) -{ - mbstate_t save_shift = __no_r_state; - size_t written; - - written = __wcsrtombs (s, &pwcs, n, &__no_r_state); - - /* Restore the old shift state. */ - __no_r_state = save_shift; - - /* Return how many we wrote (or maybe an error). */ - return written; -} - -size_t -__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps) -{ - size_t written = 0; - const wchar_t *run = *src; - - if (ps == NULL) - ps = &mbstate_internal; - - if (dst == NULL) - /* The LEN parameter has to be ignored if we don't actually write - anything. */ - len = ~0; - - while (written < len) - { - wchar_t wc = *run++; - -#if 0 - if (wc < 0 || wc > WCHAR_MAX) - { - /* This is no correct ISO 10646 character. */ - __set_errno (EILSEQ); - return (size_t) -1; - } -#endif - - if (wc == L'\0') - { - /* Found the end. */ - if (dst != NULL) - *dst = '\0'; - *src = NULL; - return written; - } - else if (wc < 0x80) - { - /* It's an one byte sequence. */ - if (dst != NULL) - *dst++ = (char) wc; - ++written; - } - else - { - size_t step; - - for (step = 2; step < 6; ++step) - if ((wc & encoding_mask[step - 2]) == 0) - break; - - if (written + step >= len) - /* Too long. */ - break; - - if (dst != NULL) - { - size_t cnt = step; - - dst[0] = encoding_byte[cnt - 2]; - - --cnt; - do - { - dst[cnt] = 0x80 | (wc & 0x3f); - wc >>= 6; - } - while (--cnt > 0); - dst[0] |= wc; - - dst += step; - } - - written += step; - } - } - - /* Store position of first unprocessed word. */ - *src = run; - - return written; -} -//weak_alias (__wcsrtombs, wcsrtombs)
Removed: trunk/reactos/lib/sdk/crt/string/wctomb.c URL: http://svn.reactos.org/svn/reactos/trunk/reactos/lib/sdk/crt/string/wctomb.c... ============================================================================== --- trunk/reactos/lib/sdk/crt/string/wctomb.c [iso-8859-1] (original) +++ trunk/reactos/lib/sdk/crt/string/wctomb.c (removed) @@ -1,47 +1,0 @@ -/* - * COPYRIGHT: See COPYING in the top level directory - * PROJECT: ReactOS system libraries - * FILE: lib/sdk/crt/mbstring/wctomb.c - * PURPOSE: - * PROGRAMER: - * UPDATE HISTORY: - * 05/30/08: Samuel Serapion adapted from PROJECT C Library - * - */ - -#include <precomp.h> -#include <mbstring.h> - -/* - * @implemented - */ -int wctomb (char *string, wchar_t widechar) -{ - int c1, c2; - - if (string == 0) - return 0; - - if (widechar & 0xff00) { - - c1 = (widechar >> 8) & 0xff; - c2 = (widechar & 0xff); - - if (_ismbblead (c1) == 0 || _ismbbtrail (c2) == 0) - return -1; - - *string++ = (char) c1; - *string = (char) c2; - - return 2; - - } - else { - - *string = (char) widechar & 0xff; - - return 1; - - } -} -