https://git.reactos.org/?p=reactos.git;a=commitdiff;h=0a851eadcb790e4a52bac…
commit 0a851eadcb790e4a52bac1e05153d0913e63e896
Author: Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
AuthorDate: Thu Feb 2 10:10:30 2023 +0900
Commit: GitHub <noreply(a)github.com>
CommitDate: Thu Feb 2 10:10:30 2023 +0900
[NOTEPAD] Speed up notepad loading (#5012)
- Use EM_GETHANDLE/EM_SETHANDLE message to get/set the internal buffer handle.
- Use LocalReAlloc to re-allocate the buffer.
- Use file mapping to speed up loading.
- Use also IS_TEXT_UNICODE_REVERSE_STATISTICS for IsTextUnicode.
CORE-14641
---
base/applications/notepad/dialog.c | 21 +--
base/applications/notepad/main.h | 15 +-
base/applications/notepad/text.c | 326 ++++++++++++++++++++-----------------
3 files changed, 192 insertions(+), 170 deletions(-)
diff --git a/base/applications/notepad/dialog.c b/base/applications/notepad/dialog.c
index 6e45d05798e..06618147121 100644
--- a/base/applications/notepad/dialog.c
+++ b/base/applications/notepad/dialog.c
@@ -419,11 +419,9 @@ BOOL DoCloseFile(VOID)
VOID DoOpenFile(LPCTSTR szFileName)
{
- static const TCHAR dotlog[] = _T(".LOG");
HANDLE hFile;
- LPTSTR pszText = NULL;
- DWORD dwTextLen;
TCHAR log[5];
+ HLOCAL hLocal;
/* Close any files and prompt to save changes */
if (!DoCloseFile())
@@ -437,21 +435,22 @@ VOID DoOpenFile(LPCTSTR szFileName)
goto done;
}
- if (!ReadText(hFile, (LPWSTR *)&pszText, &dwTextLen, &Globals.encFile,
&Globals.iEoln))
+ /* To make loading file quicker, we use the internal handle of EDIT control */
+ hLocal = (HLOCAL)SendMessageW(Globals.hEdit, EM_GETHANDLE, 0, 0);
+ if (!ReadText(hFile, &hLocal, &Globals.encFile, &Globals.iEoln))
{
ShowLastError();
goto done;
}
- SetWindowText(Globals.hEdit, pszText);
+ SendMessageW(Globals.hEdit, EM_SETHANDLE, (WPARAM)hLocal, 0);
+ /* No need of EM_SETMODIFY and EM_EMPTYUNDOBUFFER here. EM_SETHANDLE does instead.
*/
- SendMessage(Globals.hEdit, EM_SETMODIFY, FALSE, 0);
- SendMessage(Globals.hEdit, EM_EMPTYUNDOBUFFER, 0, 0);
SetFocus(Globals.hEdit);
/* If the file starts with .LOG, add a time/date at the end and set cursor after
- * See
http://support.microsoft.com/?kbid=260563
+ * See
http://web.archive.org/web/20090627165105/http://support.microsoft.com/kb/2…
*/
- if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log,
dotlog))
+ if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log,
_T(".LOG")))
{
static const TCHAR lf[] = _T("\r\n");
SendMessage(Globals.hEdit, EM_SETSEL, GetWindowTextLength(Globals.hEdit), -1);
@@ -471,8 +470,6 @@ VOID DoOpenFile(LPCTSTR szFileName)
done:
if (hFile != INVALID_HANDLE_VALUE)
CloseHandle(hFile);
- if (pszText)
- HeapFree(GetProcessHeap(), 0, pszText);
}
VOID DIALOG_FileNew(VOID)
@@ -590,7 +587,7 @@ DIALOG_FileSaveAs_Hook(HWND hDlg, UINT msg, WPARAM wParam, LPARAM
lParam)
hCombo = GetDlgItem(hDlg, ID_EOLN);
if (hCombo)
- Globals.iEoln = (int) SendMessage(hCombo, CB_GETCURSEL, 0, 0);
+ Globals.iEoln = (EOLN)SendMessage(hCombo, CB_GETCURSEL, 0, 0);
}
break;
}
diff --git a/base/applications/notepad/main.h b/base/applications/notepad/main.h
index ce7fc850877..e2140c2b52b 100644
--- a/base/applications/notepad/main.h
+++ b/base/applications/notepad/main.h
@@ -47,9 +47,12 @@ typedef enum
// #define MIN_ENCODING 0
// #define MAX_ENCODING 3
-#define EOLN_CRLF 0
-#define EOLN_LF 1
-#define EOLN_CR 2
+typedef enum
+{
+ EOLN_CRLF = 0, /* "\r\n" */
+ EOLN_LF = 1, /* "\n" */
+ EOLN_CR = 2 /* "\r" */
+} EOLN; /* End of line (NewLine) type */
typedef struct
{
@@ -76,7 +79,7 @@ typedef struct
TCHAR szStatusBarLineCol[MAX_PATH];
ENCODING encFile;
- int iEoln;
+ EOLN iEoln;
FINDREPLACE find;
WNDPROC EditProc;
@@ -89,8 +92,8 @@ extern NOTEPAD_GLOBALS Globals;
VOID SetFileName(LPCTSTR szFileName);
/* from text.c */
-BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int
*piEoln);
-BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int
iEoln);
+BOOL ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln);
+BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN
iEoln);
/* from settings.c */
void NOTEPAD_LoadSettingsFromRegistry(void);
diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c
index 6a83a89d64f..3ce3a889bfe 100644
--- a/base/applications/notepad/text.c
+++ b/base/applications/notepad/text.c
@@ -4,7 +4,7 @@
* Copyright 1998,99 Marcel Baur <mbaur(a)g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle(a)yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
- * Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
+ * Copyright 2019-2023 Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -22,31 +22,7 @@
*/
#include "notepad.h"
-
-static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD
dwAppendLen)
-{
- LPWSTR pszNewText;
-
- if (dwAppendLen > 0)
- {
- if (*ppszText)
- {
- pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText,
(*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
- }
- else
- {
- pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen *
sizeof(WCHAR));
- }
-
- if (!pszNewText)
- return FALSE;
-
- memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
- *ppszText = pszNewText;
- *pdwTextLen += dwAppendLen;
- }
- return TRUE;
-}
+#include <assert.h>
BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
{
@@ -63,71 +39,156 @@ BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
{
- INT flags = IS_TEXT_UNICODE_STATISTICS;
+ INT flags = IS_TEXT_UNICODE_STATISTICS | IS_TEXT_UNICODE_REVERSE_STATISTICS;
- if (dwSize <= 1)
+ if (dwSize <= 1 || IsTextNonZeroASCII(pBytes, dwSize))
return ENCODING_ANSI;
- if (IsTextNonZeroASCII(pBytes, dwSize))
- {
- return ENCODING_ANSI;
- }
-
if (IsTextUnicode(pBytes, dwSize, &flags))
- {
return ENCODING_UTF16LE;
- }
if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags &
IS_TEXT_UNICODE_ILLEGAL_CHARS))
- {
return ENCODING_UTF16BE;
- }
/* is it UTF-8? */
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
- {
return ENCODING_UTF8;
- }
return ENCODING_ANSI;
}
+static VOID
+ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
+{
+ BOOL bPrevCR = FALSE;
+ SIZE_T ichNew, ichOld;
+
+ for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld)
+ {
+ WCHAR ch = pszOld[ichOld];
+
+ if (ch == L'\n')
+ {
+ if (!bPrevCR)
+ {
+ pszNew[ichNew++] = L'\r';
+ pszNew[ichNew++] = L'\n';
+ }
+ }
+ else if (ch == '\r')
+ {
+ pszNew[ichNew++] = L'\r';
+ pszNew[ichNew++] = L'\n';
+ }
+ else
+ {
+ pszNew[ichNew++] = ch;
+ }
+
+ bPrevCR = (ch == L'\r');
+ }
+
+ pszNew[ichNew] = UNICODE_NULL;
+ assert(ichNew == cchNew);
+}
+
+static BOOL
+ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN
*piEoln)
+{
+ SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs;
+ LPWSTR pszText = *ppszText;
+ EOLN iEoln;
+ BOOL bPrevCR = FALSE;
+
+ /* Replace '\0' with SPACE. Count newlines. */
+ for (ich = 0; ich < cchText; ++ich)
+ {
+ WCHAR ch = pszText[ich];
+ if (ch == UNICODE_NULL)
+ pszText[ich] = L' ';
+
+ if (ch == L'\n')
+ {
+ if (bPrevCR)
+ {
+ adwEolnCount[EOLN_CR]--;
+ adwEolnCount[EOLN_CRLF]++;
+ }
+ else
+ {
+ adwEolnCount[EOLN_LF]++;
+ }
+ }
+ else if (ch == '\r')
+ {
+ adwEolnCount[EOLN_CR]++;
+ }
+
+ bPrevCR = (ch == L'\r');
+ }
+
+ /* Choose the newline code */
+ if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF])
+ iEoln = EOLN_CR;
+ else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF])
+ iEoln = EOLN_LF;
+ else
+ iEoln = EOLN_CRLF;
+
+ cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF];
+ if (cNonCRLFs != 0)
+ {
+ /* Allocate a buffer for EM_SETHANDLE */
+ SIZE_T cchNew = cchText + cNonCRLFs;
+ HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR));
+ LPWSTR pszNew = LocalLock(hLocal);
+ if (!pszNew)
+ {
+ LocalFree(hLocal);
+ return FALSE; /* Failure */
+ }
+
+ ReplaceNewLines(pszNew, cchNew, pszText, cchText);
+
+ /* Replace with new data */
+ LocalUnlock(*phLocal);
+ LocalFree(*phLocal);
+ *phLocal = hLocal;
+ *ppszText = pszNew;
+ *pcchText = cchNew;
+ }
+
+ *piEoln = iEoln;
+ return TRUE;
+}
+
BOOL
-ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int
*piEoln)
+ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
{
- DWORD dwSize;
- LPBYTE pBytes = NULL;
- LPWSTR pszText;
- LPWSTR pszAllocText = NULL;
- DWORD dwPos, i;
- DWORD dwCharCount;
+ PCHAR pBytes = NULL;
+ LPWSTR pszText, pszNewText = NULL;
+ DWORD dwSize, dwPos;
+ SIZE_T i, cchText, cbContent;
BOOL bSuccess = FALSE;
- BYTE b = 0;
ENCODING encFile = ENCODING_ANSI;
- int iCodePage = 0;
- WCHAR szCrlf[2] = {'\r', '\n'};
- DWORD adwEolnCount[3] = {0, 0, 0};
-
- *ppszText = NULL;
- *pdwTextLen = 0;
+ UINT iCodePage;
+ HANDLE hMapping = INVALID_HANDLE_VALUE;
+ HLOCAL hNewLocal;
dwSize = GetFileSize(hFile, NULL);
if (dwSize == INVALID_FILE_SIZE)
goto done;
- pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
- if (!pBytes)
+ hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (hMapping == NULL)
goto done;
- if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
+ pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize);
+ if (!pBytes)
goto done;
- dwPos = 0;
-
- /* Make sure that there is a NUL character at the end, in any encoding */
- pBytes[dwSize + 0] = '\0';
- pBytes[dwSize + 1] = '\0';
/* Look for Byte Order Marks */
+ dwPos = 0;
if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
{
encFile = ENCODING_UTF16LE;
@@ -151,124 +212,85 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen,
ENCODING *pencFile,
switch(encFile)
{
case ENCODING_UTF16BE:
- for (i = dwPos; i < dwSize-1; i += 2)
- {
- b = pBytes[i+0];
- pBytes[i+0] = pBytes[i+1];
- pBytes[i+1] = b;
- }
- /* fall through */
-
case ENCODING_UTF16LE:
+ {
+ /* Re-allocate the buffer for EM_SETHANDLE */
pszText = (LPWSTR) &pBytes[dwPos];
- dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
+ cchText = (dwSize - dwPos) / sizeof(WCHAR);
+ hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR),
LMEM_MOVEABLE);
+ pszNewText = LocalLock(hNewLocal);
+ if (pszNewText == NULL)
+ goto done;
+
+ *phLocal = hNewLocal;
+ CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR));
+
+ if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */
+ {
+ BYTE tmp, *pb = (LPBYTE)pszNewText;
+ for (i = 0; i < cchText * 2; i += 2)
+ {
+ tmp = pb[i];
+ pb[i] = pb[i + 1];
+ pb[i + 1] = tmp;
+ }
+ }
break;
+ }
case ENCODING_ANSI:
case ENCODING_UTF8:
case ENCODING_UTF8BOM:
- if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
- iCodePage = CP_UTF8;
- else
- iCodePage = CP_ACP;
+ {
+ iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8
: CP_ACP);
- if ((dwSize - dwPos) > 0)
+ /* Get ready for ANSI-to-Wide conversion */
+ cbContent = dwSize - dwPos;
+ cchText = 0;
+ if (cbContent > 0)
{
- dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos],
dwSize - dwPos, NULL, 0);
- if (dwCharCount == 0)
+ cchText = MultiByteToWideChar(iCodePage, 0, &pBytes[dwPos],
(INT)cbContent, NULL, 0);
+ if (cchText == 0)
goto done;
}
- else
- {
- /* special case for files with no characters (other than BOMs) */
- dwCharCount = 0;
- }
- pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) *
sizeof(WCHAR));
- if (!pszAllocText)
+ /* Re-allocate the buffer for EM_SETHANDLE */
+ hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR),
LMEM_MOVEABLE);
+ pszNewText = LocalLock(hNewLocal);
+ if (!pszNewText)
goto done;
+ *phLocal = hNewLocal;
- if ((dwSize - dwPos) > 0)
+ /* Do ANSI-to-Wide conversion */
+ if (cbContent > 0)
{
- if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize -
dwPos, pszAllocText, dwCharCount))
+ if (!MultiByteToWideChar(iCodePage, 0,
+ &pBytes[dwPos], (INT)cbContent, pszNewText,
(INT)cchText))
+ {
goto done;
+ }
}
-
- pszAllocText[dwCharCount] = '\0';
- pszText = pszAllocText;
break;
- DEFAULT_UNREACHABLE;
}
- dwPos = 0;
- for (i = 0; i < dwCharCount; i++)
- {
- switch(pszText[i])
- {
- case '\r':
- if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
- {
- i++;
- adwEolnCount[EOLN_CRLF]++;
- break;
- }
- /* fall through */
-
- case '\n':
- if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
- return FALSE;
- if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
- return FALSE;
- dwPos = i + 1;
-
- if (pszText[i] == '\r')
- adwEolnCount[EOLN_CR]++;
- else
- adwEolnCount[EOLN_LF]++;
- break;
-
- case '\0':
- pszText[i] = ' ';
- break;
- }
+ DEFAULT_UNREACHABLE;
}
- if (!*ppszText && (pszText == pszAllocText))
- {
- /* special case; don't need to reallocate */
- *ppszText = pszAllocText;
- *pdwTextLen = dwCharCount;
- pszAllocText = NULL;
- }
- else
- {
- /* append last remaining text */
- if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
- return FALSE;
- }
+ pszNewText[cchText] = UNICODE_NULL;
- /* chose which eoln to use */
- *piEoln = EOLN_CRLF;
- if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
- *piEoln = EOLN_LF;
- if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
- *piEoln = EOLN_CR;
- *pencFile = encFile;
+ if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln))
+ goto done;
+ *pencFile = encFile;
bSuccess = TRUE;
done:
if (pBytes)
- HeapFree(GetProcessHeap(), 0, pBytes);
- if (pszAllocText)
- HeapFree(GetProcessHeap(), 0, pszAllocText);
-
- if (!bSuccess && *ppszText)
- {
- HeapFree(GetProcessHeap(), 0, *ppszText);
- *ppszText = NULL;
- *pdwTextLen = 0;
- }
+ UnmapViewOfFile(pBytes);
+ if (hMapping != INVALID_HANDLE_VALUE)
+ CloseHandle(hMapping);
+ if (pszNewText)
+ LocalUnlock(*phLocal);
return bSuccess;
}
@@ -367,7 +389,7 @@ done:
return bSuccess;
}
-BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int
iEoln)
+BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN
iEoln)
{
WCHAR wcBom;
LPCWSTR pszLF = L"\n";