From ea624b2b72589485096f14fcb9772f94d5f65a35 Mon Sep 17 00:00:00 2001 From: Clemens Wasser <clemens.wasser@gmail.com> Date: Sat, 18 Jun 2022 00:08:42 +0200 Subject: [PATCH] SystemTools: Optimize GetCasePathName --- SystemTools.cxx | 189 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 134 insertions(+), 55 deletions(-) diff --git a/SystemTools.cxx b/SystemTools.cxx index 5889a4b..cceeb18 100644 --- a/SystemTools.cxx +++ b/SystemTools.cxx @@ -571,73 +571,152 @@ public: static SystemToolsStatic* SystemToolsStatics; #ifdef _WIN32 +static bool GetNextPathComponent(const char*& pathBegin, + const char*& pathMidpoint, + const char*& pathEnd) +{ + for (auto it = pathMidpoint - 1; it - pathBegin > 2; --it) { + if (*(it - 1) == '/' || *(it - 1) == '\\') { + pathMidpoint = it; + return true; + } + } + + return false; +} + std::string SystemToolsStatic::GetCasePathName(std::string const& pathIn) { - std::string casePath; + // This looks unnecessary, but it allows for the return value optimization + // since all return paths return the same local variable. + std::string casePath = pathIn; // First check if the file is relative. We don't fix relative paths since the // real case depends on the root directory and the given path fragment may // have meaning elsewhere in the project. - if (!SystemTools::FileIsFullPath(pathIn)) { - // This looks unnecessary, but it allows for the return value optimization - // since all return paths return the same local variable. - casePath = pathIn; + if (!SystemTools::FileIsFullPath(pathIn)) return casePath; - } - std::vector<std::string> path_components; - SystemTools::SplitPath(pathIn, path_components); - - // Start with root component. - std::vector<std::string>::size_type idx = 0; - casePath = path_components[idx++]; - // make sure drive letter is always upper case - if (casePath.size() > 1 && casePath[1] == ':') { - casePath[0] = toupper(casePath[0]); - } - const char* sep = ""; - - // If network path, fill casePath with server/share so FindFirstFile - // will work after that. Maybe someday call other APIs to get - // actual case of servers and shares. - if (path_components.size() > 2 && path_components[0] == "//") { - casePath += path_components[idx++]; - casePath += "/"; - casePath += path_components[idx++]; - sep = "/"; - } - - // Convert case of all components that exist. - bool converting = true; - for (; idx < path_components.size(); idx++) { - casePath += sep; - sep = "/"; - - if (converting) { - // If path component contains wildcards, we skip matching - // because these filenames are not allowed on windows, - // and we do not want to match a different file. - if (path_components[idx].find('*') != std::string::npos || - path_components[idx].find('?') != std::string::npos) { - converting = false; - } else { - std::string test_str = casePath; - test_str += path_components[idx]; - WIN32_FIND_DATAW findData; - HANDLE hFind = - ::FindFirstFileW(Encoding::ToWide(test_str).c_str(), &findData); - if (INVALID_HANDLE_VALUE != hFind) { - path_components[idx] = Encoding::ToNarrow(findData.cFileName); - ::FindClose(hFind); - } else { - converting = false; - } + std::wstring actualPath(4096, L'\0'); + + const auto* pathBegin = pathIn.c_str(); + // The midpoint between the parts of the path that do and don't exist + const auto* pathMidpoint = pathIn.c_str() + pathIn.size(); + const auto* pathEnd = pathIn.c_str() + pathIn.size(); + + for (;;) { + auto const widePathLen = MultiByteToWideChar( + CP_UTF8, 0, pathBegin, int(pathMidpoint - pathBegin), nullptr, 0); + std::wstring widePath(widePathLen, L'\0'); + MultiByteToWideChar(CP_UTF8, 0, pathBegin, int(pathMidpoint - pathBegin), + widePath.data(), widePathLen); + + const auto hFile = CreateFileW( + widePath.c_str(), GENERIC_READ, + FILE_SHARE_READ, // Don't lock the file + nullptr, + OPEN_EXISTING, // Don't create the file + FILE_FLAG_BACKUP_SEMANTICS, // Required for opening Directories + nullptr); + + // If an error occurs, try the next path component + if (hFile == INVALID_HANDLE_VALUE) { + if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd)) + return casePath; + continue; + } + + // Get the actual case correct path + auto length = GetFinalPathNameByHandleW(hFile, actualPath.data(), + actualPath.size(), 0); + + // If an error occurs, try the next path component + if (length <= 0) { + if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd)) + return casePath; + continue; + } + + auto const shouldEnlarge = length >= actualPath.size(); + actualPath.resize(length); + + // If the buffer was too small, retry with a bigger buffer + if (shouldEnlarge) { + length = GetFinalPathNameByHandleW(hFile, actualPath.data(), + actualPath.size(), 0); + + // If an error occurs, try the next path component + if (length <= 0) { + if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd)) + return casePath; + continue; } } - casePath += path_components[idx]; + // Fixup long path prefix + if (!SystemTools::StringStartsWith(pathIn, "\\\\?\\")) { + actualPath.erase(0, 4); + } + + // Fixup path separators + auto* actualPathPtr = actualPath.data() + actualPath.size(); + const auto* pathPtr = pathMidpoint - 1; + for (;;) { + while (actualPathPtr >= actualPath.data() && *actualPathPtr != L'\\' && + *actualPathPtr != L'/') { + --actualPathPtr; + } + + if (actualPathPtr < actualPath.data()) + break; + + while (pathPtr >= pathBegin && *pathPtr != '\\' && *pathPtr != '/') { + --pathPtr; + } + + if (pathPtr < pathBegin) + break; + + *actualPathPtr = wchar_t(*pathPtr); + --actualPathPtr; + --pathPtr; + } + + casePath = Encoding::ToNarrow(actualPath); + + // Fixup missing path separator + if (pathMidpoint != pathEnd && casePath.back() != '/' && + casePath.back() != '\\' && pathBegin < pathMidpoint) + casePath.push_back(*(pathMidpoint - 1)); + + // Fixup symlinks + auto casePathIt = casePath.size(); + auto pathInIt = size_t(pathMidpoint - pathBegin); + while (casePathIt-- > 0 && pathInIt-- > 0) { + // Skip short paths + if (pathInIt > 0 && !strncmp(pathIn.c_str() + pathInIt - 1, "~1", 2)) + break; + + if (pathInIt == 0 && casePathIt > 0) { + casePath.erase(0, casePathIt); + break; + } + + if (tolower(casePath[casePathIt]) == tolower(pathIn[pathInIt])) + continue; + + casePath.replace(0, casePathIt + 1, pathIn.c_str(), pathInIt + 1); + + break; + } + + // append the rest of the path, which doesn't exist + if (pathMidpoint != pathEnd) + casePath.append(pathMidpoint); + + CloseHandle(hFile); + return casePath; } - return casePath; } std::string SystemToolsStatic::GetActualCaseForPathCached(std::string const& p) -- GitLab