https://git.reactos.org/?p=reactos.git;a=commitdiff;h=e85664a3d8dfb955b56b2…
commit e85664a3d8dfb955b56b2f5d27a8ccd85db9454b
Author: Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
AuthorDate: Sun Aug 18 22:46:56 2019 +0900
Commit: GitHub <noreply(a)github.com>
CommitDate: Sun Aug 18 22:46:56 2019 +0900
[NOTEPAD] Encoding detection (#1852)
CORE-15548
In notepad, if there is no BOM in the input file, then judge the text encoding.
---
base/applications/notepad/text.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c
index 6e26a7ab474..d22cf4817ee 100644
--- a/base/applications/notepad/text.c
+++ b/base/applications/notepad/text.c
@@ -4,6 +4,7 @@
* Copyright 1998,99 Marcel Baur <mbaur(a)g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle(a)yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
+ * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -47,6 +48,32 @@ static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR
pszAppendText, D
return TRUE;
}
+ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
+{
+ INT flags = IS_TEXT_UNICODE_STATISTICS;
+
+ if (dwSize <= 1)
+ return ENCODING_ANSI;
+
+ if (IsTextUnicode(pBytes, dwSize, &flags))
+ {
+ return ENCODING_UTF16LE;
+ }
+
+ if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags &
IS_TEXT_UNICODE_ILLEGAL_CHARS))
+ {
+ return ENCODING_UTF16BE;
+ }
+
+ /* is it UTF-8? */
+ if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
+ {
+ return ENCODING_UTF8;
+ }
+
+ return ENCODING_ANSI;
+}
+
BOOL
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int
*piEoln)
{
@@ -98,6 +125,10 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING
*pencFile,
encFile = ENCODING_UTF8;
dwPos += 3;
}
+ else
+ {
+ encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
+ }
switch(encFile)
{