https://git.reactos.org/?p=reactos.git;a=commitdiff;h=7376cdebb6fc945798b292...
commit 7376cdebb6fc945798b29212c9f3239ea52cf89f Author: Katayama Hirofumi MZ katayama.hirofumi.mz@gmail.com AuthorDate: Mon Sep 7 22:30:47 2020 +0900 Commit: GitHub noreply@github.com CommitDate: Mon Sep 7 22:30:47 2020 +0900
[SHELLFIND] Encoding-aware file search (#3134)
Consider UTF-8/UTF-16/UTF-16BE encodings in file search. Recognize UTF-8/UTF-16/UTF-16BE BOM and NULs at the beginning of the file contents. Optimize for speed. CORE-17250 --- dll/win32/browseui/shellfind/CFindFolder.cpp | 110 ++++++++++++++++++--------- 1 file changed, 75 insertions(+), 35 deletions(-)
diff --git a/dll/win32/browseui/shellfind/CFindFolder.cpp b/dll/win32/browseui/shellfind/CFindFolder.cpp index d3c2b6f3c5c..ec24b116c29 100644 --- a/dll/win32/browseui/shellfind/CFindFolder.cpp +++ b/dll/win32/browseui/shellfind/CFindFolder.cpp @@ -152,6 +152,8 @@ struct _SearchData CStringW szFileName; CStringA szQueryA; CStringW szQueryW; + CStringW szQueryU16BE; + CStringA szQueryU8; BOOL SearchHidden; CComPtr<CFindFolder> pFindFolder; }; @@ -171,61 +173,68 @@ static const TChar* StrStrN(const TChar *lpFirst, const TString &lpSrch, UINT cc return NULL; }
-template<typename TChar, typename TString, int (&StrNCmp)(const TChar *, const TChar *, size_t)> -static UINT StrStrNCount(const TChar *lpFirst, const TString &lpSrch, UINT cchMax) -{ - const TChar *lpSearchEnd = lpFirst + cchMax; - UINT uCount = 0; - while (lpFirst < lpSearchEnd && (lpFirst = StrStrN<TChar, TString, StrNCmp>(lpFirst, lpSrch, cchMax))) - { - uCount++; - lpFirst += lpSrch.GetLength(); - cchMax = lpSearchEnd - lpFirst; - } - return uCount; -} - -static UINT StrStrCountNIA(const CHAR *lpFirst, const CStringA &lpSrch, UINT cchMax) +static inline BOOL +StrFindNIA(const CHAR *lpFirst, const CStringA &lpSrch, UINT cchMax) { - return StrStrNCount<CHAR, CStringA, _strnicmp>(lpFirst, lpSrch, cchMax); + return StrStrN<CHAR, CStringA, _strnicmp>(lpFirst, lpSrch, cchMax) != NULL; }
-static UINT StrStrCountNIW(const WCHAR *lpFirst, const CStringW &lpSrch, UINT cchMax) +static inline BOOL +StrFindNIW(const WCHAR *lpFirst, const CStringW &lpSrch, UINT cchMax) { - return StrStrNCount<WCHAR, CStringW, _wcsnicmp>(lpFirst, lpSrch, cchMax); + return StrStrN<WCHAR, CStringW, _wcsnicmp>(lpFirst, lpSrch, cchMax) != NULL; }
-static UINT SearchFile(LPCWSTR lpFilePath, _SearchData *pSearchData) +static BOOL SearchFile(LPCWSTR lpFilePath, _SearchData *pSearchData) { HANDLE hFile = CreateFileW(lpFilePath, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL); if (hFile == INVALID_HANDLE_VALUE) - return 0; + return FALSE;
+ // FIXME: support large file DWORD size = GetFileSize(hFile, NULL); - HANDLE hFileMap = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL); + if (size == 0 || size == INVALID_FILE_SIZE) + { + CloseHandle(hFile); + return FALSE; + } + + HANDLE hFileMap = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, size, NULL); CloseHandle(hFile); if (hFileMap == INVALID_HANDLE_VALUE) - return 0; + return FALSE;
- LPBYTE lpFileContent = (LPBYTE) MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, 0); + LPBYTE pbContents = (LPBYTE)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, size); CloseHandle(hFileMap); - if (!lpFileContent) - return 0; + if (!pbContents) + return FALSE;
- UINT uMatches = 0; - // Check for UTF-16 BOM - if (size >= 2 && lpFileContent[0] == 0xFF && lpFileContent[1] == 0xFE) + BOOL bFound; + if (size >= 2 && + (memcmp(pbContents, "\xFF\xFE", 2) == 0 || (pbContents[0] && !pbContents[1]))) + { + // UTF-16 + bFound = StrFindNIW((LPCWSTR)pbContents, pSearchData->szQueryW, size / sizeof(WCHAR)); + } + else if (size >= 2 && + (memcmp(pbContents, "\xFE\xFF", 2) == 0 || (!pbContents[0] && pbContents[1]))) + { + // UTF-16 BE + bFound = StrFindNIW((LPCWSTR)pbContents, pSearchData->szQueryU16BE, size / sizeof(WCHAR)); + } + else if (size >= 3 && memcmp(pbContents, "\xEF\xBB\xBF", 3) == 0) { - uMatches = StrStrCountNIW((LPCWSTR) lpFileContent, pSearchData->szQueryW, size / sizeof(WCHAR)); + // UTF-8 + bFound = StrFindNIA((LPCSTR)pbContents, pSearchData->szQueryU8, size / sizeof(CHAR)); } else { - uMatches = StrStrCountNIA((LPCSTR) lpFileContent, pSearchData->szQueryA, size / sizeof(CHAR)); + // ANSI + bFound = StrFindNIA((LPCSTR)pbContents, pSearchData->szQueryA, size / sizeof(CHAR)); }
- UnmapViewOfFile(lpFileContent); - - return uMatches; + UnmapViewOfFile(pbContents); + return bFound; }
static BOOL FileNameMatch(LPCWSTR FindDataFileName, _SearchData *pSearchData) @@ -266,13 +275,14 @@ static UINT RecursiveFind(LPCWSTR lpPath, _SearchData *pSearchData) BOOL bMoreFiles = TRUE; UINT uTotalFound = 0;
- PathCombineW(szPath, lpPath, L"*.*"); + PathCombineW(szPath, lpPath, L"*");
for (hFindFile = FindFirstFileW(szPath, &FindData); bMoreFiles && hFindFile != INVALID_HANDLE_VALUE; bMoreFiles = FindNextFileW(hFindFile, &FindData)) { - if (!wcscmp(FindData.cFileName, L".") || !wcscmp(FindData.cFileName, L"..")) +#define IS_DOTS(psz) ((psz)[0] == L'.' && ((psz)[1] == 0 || ((psz)[1] == L'.' && (psz)[2] == 0))) + if (IS_DOTS(FindData.cFileName)) continue;
PathCombineW(szPath, lpPath, FindData.cFileName); @@ -368,6 +378,36 @@ LRESULT CFindFolder::StartSearch(UINT uMsg, WPARAM wParam, LPARAM lParam, BOOL & pSearchData->szFileName = pSearchParams->szFileName; pSearchData->szQueryA = pSearchParams->szQuery; pSearchData->szQueryW = pSearchParams->szQuery; + + // UTF-16 BE + { + CStringW utf16 = pSearchData->szQueryW; + LPWSTR psz = utf16.GetBuffer(); + for (SIZE_T i = 0; psz[i]; ++i) + { + psz[i] = MAKEWORD(HIBYTE(psz[i]), LOBYTE(psz[i])); + } + utf16.ReleaseBuffer(); + pSearchData->szQueryU16BE = utf16; + } + + // UTF-8 + { + CStringA utf8; + INT cch = WideCharToMultiByte(CP_UTF8, 0, pSearchData->szQueryW, -1, NULL, 0, NULL, NULL); + if (cch > 0) + { + LPSTR psz = utf8.GetBuffer(cch); + WideCharToMultiByte(CP_UTF8, 0, pSearchData->szQueryW, -1, psz, cch, NULL, NULL); + utf8.ReleaseBuffer(); + pSearchData->szQueryU8 = utf8; + } + else + { + pSearchData->szQueryU8 = pSearchData->szQueryA; + } + } + pSearchData->SearchHidden = pSearchParams->SearchHidden; SHFree(pSearchParams);