https://git.reactos.org/?p=reactos.git;a=commitdiff;h=7376cdebb6fc945798b29…
commit 7376cdebb6fc945798b29212c9f3239ea52cf89f
Author: Katayama Hirofumi MZ <katayama.hirofumi.mz(a)gmail.com>
AuthorDate: Mon Sep 7 22:30:47 2020 +0900
Commit: GitHub <noreply(a)github.com>
CommitDate: Mon Sep 7 22:30:47 2020 +0900
[SHELLFIND] Encoding-aware file search (#3134)
Consider UTF-8/UTF-16/UTF-16BE encodings in file search. Recognize
UTF-8/UTF-16/UTF-16BE BOM and NULs at the beginning of the file contents. Optimize for
speed.
CORE-17250
---
dll/win32/browseui/shellfind/CFindFolder.cpp | 110 ++++++++++++++++++---------
1 file changed, 75 insertions(+), 35 deletions(-)
diff --git a/dll/win32/browseui/shellfind/CFindFolder.cpp
b/dll/win32/browseui/shellfind/CFindFolder.cpp
index d3c2b6f3c5c..ec24b116c29 100644
--- a/dll/win32/browseui/shellfind/CFindFolder.cpp
+++ b/dll/win32/browseui/shellfind/CFindFolder.cpp
@@ -152,6 +152,8 @@ struct _SearchData
CStringW szFileName;
CStringA szQueryA;
CStringW szQueryW;
+ CStringW szQueryU16BE;
+ CStringA szQueryU8;
BOOL SearchHidden;
CComPtr<CFindFolder> pFindFolder;
};
@@ -171,61 +173,68 @@ static const TChar* StrStrN(const TChar *lpFirst, const TString
&lpSrch, UINT cc
return NULL;
}
-template<typename TChar, typename TString, int (&StrNCmp)(const TChar *, const
TChar *, size_t)>
-static UINT StrStrNCount(const TChar *lpFirst, const TString &lpSrch, UINT cchMax)
-{
- const TChar *lpSearchEnd = lpFirst + cchMax;
- UINT uCount = 0;
- while (lpFirst < lpSearchEnd && (lpFirst = StrStrN<TChar, TString,
StrNCmp>(lpFirst, lpSrch, cchMax)))
- {
- uCount++;
- lpFirst += lpSrch.GetLength();
- cchMax = lpSearchEnd - lpFirst;
- }
- return uCount;
-}
-
-static UINT StrStrCountNIA(const CHAR *lpFirst, const CStringA &lpSrch, UINT cchMax)
+static inline BOOL
+StrFindNIA(const CHAR *lpFirst, const CStringA &lpSrch, UINT cchMax)
{
- return StrStrNCount<CHAR, CStringA, _strnicmp>(lpFirst, lpSrch, cchMax);
+ return StrStrN<CHAR, CStringA, _strnicmp>(lpFirst, lpSrch, cchMax) != NULL;
}
-static UINT StrStrCountNIW(const WCHAR *lpFirst, const CStringW &lpSrch, UINT
cchMax)
+static inline BOOL
+StrFindNIW(const WCHAR *lpFirst, const CStringW &lpSrch, UINT cchMax)
{
- return StrStrNCount<WCHAR, CStringW, _wcsnicmp>(lpFirst, lpSrch, cchMax);
+ return StrStrN<WCHAR, CStringW, _wcsnicmp>(lpFirst, lpSrch, cchMax) != NULL;
}
-static UINT SearchFile(LPCWSTR lpFilePath, _SearchData *pSearchData)
+static BOOL SearchFile(LPCWSTR lpFilePath, _SearchData *pSearchData)
{
HANDLE hFile = CreateFileW(lpFilePath, GENERIC_READ, 0, NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_READONLY, NULL);
if (hFile == INVALID_HANDLE_VALUE)
- return 0;
+ return FALSE;
+ // FIXME: support large file
DWORD size = GetFileSize(hFile, NULL);
- HANDLE hFileMap = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
+ if (size == 0 || size == INVALID_FILE_SIZE)
+ {
+ CloseHandle(hFile);
+ return FALSE;
+ }
+
+ HANDLE hFileMap = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, size, NULL);
CloseHandle(hFile);
if (hFileMap == INVALID_HANDLE_VALUE)
- return 0;
+ return FALSE;
- LPBYTE lpFileContent = (LPBYTE) MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, 0);
+ LPBYTE pbContents = (LPBYTE)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, size);
CloseHandle(hFileMap);
- if (!lpFileContent)
- return 0;
+ if (!pbContents)
+ return FALSE;
- UINT uMatches = 0;
- // Check for UTF-16 BOM
- if (size >= 2 && lpFileContent[0] == 0xFF && lpFileContent[1] ==
0xFE)
+ BOOL bFound;
+ if (size >= 2 &&
+ (memcmp(pbContents, "\xFF\xFE", 2) == 0 || (pbContents[0] &&
!pbContents[1])))
+ {
+ // UTF-16
+ bFound = StrFindNIW((LPCWSTR)pbContents, pSearchData->szQueryW, size /
sizeof(WCHAR));
+ }
+ else if (size >= 2 &&
+ (memcmp(pbContents, "\xFE\xFF", 2) == 0 || (!pbContents[0]
&& pbContents[1])))
+ {
+ // UTF-16 BE
+ bFound = StrFindNIW((LPCWSTR)pbContents, pSearchData->szQueryU16BE, size /
sizeof(WCHAR));
+ }
+ else if (size >= 3 && memcmp(pbContents, "\xEF\xBB\xBF", 3) ==
0)
{
- uMatches = StrStrCountNIW((LPCWSTR) lpFileContent, pSearchData->szQueryW, size
/ sizeof(WCHAR));
+ // UTF-8
+ bFound = StrFindNIA((LPCSTR)pbContents, pSearchData->szQueryU8, size /
sizeof(CHAR));
}
else
{
- uMatches = StrStrCountNIA((LPCSTR) lpFileContent, pSearchData->szQueryA, size
/ sizeof(CHAR));
+ // ANSI
+ bFound = StrFindNIA((LPCSTR)pbContents, pSearchData->szQueryA, size /
sizeof(CHAR));
}
- UnmapViewOfFile(lpFileContent);
-
- return uMatches;
+ UnmapViewOfFile(pbContents);
+ return bFound;
}
static BOOL FileNameMatch(LPCWSTR FindDataFileName, _SearchData *pSearchData)
@@ -266,13 +275,14 @@ static UINT RecursiveFind(LPCWSTR lpPath, _SearchData *pSearchData)
BOOL bMoreFiles = TRUE;
UINT uTotalFound = 0;
- PathCombineW(szPath, lpPath, L"*.*");
+ PathCombineW(szPath, lpPath, L"*");
for (hFindFile = FindFirstFileW(szPath, &FindData);
bMoreFiles && hFindFile != INVALID_HANDLE_VALUE;
bMoreFiles = FindNextFileW(hFindFile, &FindData))
{
- if (!wcscmp(FindData.cFileName, L".") || !wcscmp(FindData.cFileName,
L".."))
+#define IS_DOTS(psz) ((psz)[0] == L'.' && ((psz)[1] == 0 || ((psz)[1] ==
L'.' && (psz)[2] == 0)))
+ if (IS_DOTS(FindData.cFileName))
continue;
PathCombineW(szPath, lpPath, FindData.cFileName);
@@ -368,6 +378,36 @@ LRESULT CFindFolder::StartSearch(UINT uMsg, WPARAM wParam, LPARAM
lParam, BOOL &
pSearchData->szFileName = pSearchParams->szFileName;
pSearchData->szQueryA = pSearchParams->szQuery;
pSearchData->szQueryW = pSearchParams->szQuery;
+
+ // UTF-16 BE
+ {
+ CStringW utf16 = pSearchData->szQueryW;
+ LPWSTR psz = utf16.GetBuffer();
+ for (SIZE_T i = 0; psz[i]; ++i)
+ {
+ psz[i] = MAKEWORD(HIBYTE(psz[i]), LOBYTE(psz[i]));
+ }
+ utf16.ReleaseBuffer();
+ pSearchData->szQueryU16BE = utf16;
+ }
+
+ // UTF-8
+ {
+ CStringA utf8;
+ INT cch = WideCharToMultiByte(CP_UTF8, 0, pSearchData->szQueryW, -1, NULL, 0,
NULL, NULL);
+ if (cch > 0)
+ {
+ LPSTR psz = utf8.GetBuffer(cch);
+ WideCharToMultiByte(CP_UTF8, 0, pSearchData->szQueryW, -1, psz, cch, NULL,
NULL);
+ utf8.ReleaseBuffer();
+ pSearchData->szQueryU8 = utf8;
+ }
+ else
+ {
+ pSearchData->szQueryU8 = pSearchData->szQueryA;
+ }
+ }
+
pSearchData->SearchHidden = pSearchParams->SearchHidden;
SHFree(pSearchParams);