From ea624b2b72589485096f14fcb9772f94d5f65a35 Mon Sep 17 00:00:00 2001
From: Clemens Wasser <clemens.wasser@gmail.com>
Date: Sat, 18 Jun 2022 00:08:42 +0200
Subject: [PATCH] SystemTools: Optimize GetCasePathName

---
 SystemTools.cxx | 189 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 134 insertions(+), 55 deletions(-)

diff --git a/SystemTools.cxx b/SystemTools.cxx
index 5889a4b..cceeb18 100644
--- a/SystemTools.cxx
+++ b/SystemTools.cxx
@@ -571,73 +571,152 @@ public:
 static SystemToolsStatic* SystemToolsStatics;
 
 #ifdef _WIN32
+static bool GetNextPathComponent(const char*& pathBegin,
+                                 const char*& pathMidpoint,
+                                 const char*& pathEnd)
+{
+  for (auto it = pathMidpoint - 1; it - pathBegin > 2; --it) {
+    if (*(it - 1) == '/' || *(it - 1) == '\\') {
+      pathMidpoint = it;
+      return true;
+    }
+  }
+
+  return false;
+}
+
 std::string SystemToolsStatic::GetCasePathName(std::string const& pathIn)
 {
-  std::string casePath;
+  // This looks unnecessary, but it allows for the return value optimization
+  // since all return paths return the same local variable.
+  std::string casePath = pathIn;
 
   // First check if the file is relative. We don't fix relative paths since the
   // real case depends on the root directory and the given path fragment may
   // have meaning elsewhere in the project.
-  if (!SystemTools::FileIsFullPath(pathIn)) {
-    // This looks unnecessary, but it allows for the return value optimization
-    // since all return paths return the same local variable.
-    casePath = pathIn;
+  if (!SystemTools::FileIsFullPath(pathIn))
     return casePath;
-  }
 
-  std::vector<std::string> path_components;
-  SystemTools::SplitPath(pathIn, path_components);
-
-  // Start with root component.
-  std::vector<std::string>::size_type idx = 0;
-  casePath = path_components[idx++];
-  // make sure drive letter is always upper case
-  if (casePath.size() > 1 && casePath[1] == ':') {
-    casePath[0] = toupper(casePath[0]);
-  }
-  const char* sep = "";
-
-  // If network path, fill casePath with server/share so FindFirstFile
-  // will work after that.  Maybe someday call other APIs to get
-  // actual case of servers and shares.
-  if (path_components.size() > 2 && path_components[0] == "//") {
-    casePath += path_components[idx++];
-    casePath += "/";
-    casePath += path_components[idx++];
-    sep = "/";
-  }
-
-  // Convert case of all components that exist.
-  bool converting = true;
-  for (; idx < path_components.size(); idx++) {
-    casePath += sep;
-    sep = "/";
-
-    if (converting) {
-      // If path component contains wildcards, we skip matching
-      // because these filenames are not allowed on windows,
-      // and we do not want to match a different file.
-      if (path_components[idx].find('*') != std::string::npos ||
-          path_components[idx].find('?') != std::string::npos) {
-        converting = false;
-      } else {
-        std::string test_str = casePath;
-        test_str += path_components[idx];
-        WIN32_FIND_DATAW findData;
-        HANDLE hFind =
-          ::FindFirstFileW(Encoding::ToWide(test_str).c_str(), &findData);
-        if (INVALID_HANDLE_VALUE != hFind) {
-          path_components[idx] = Encoding::ToNarrow(findData.cFileName);
-          ::FindClose(hFind);
-        } else {
-          converting = false;
-        }
+  std::wstring actualPath(4096, L'\0');
+
+  const auto* pathBegin = pathIn.c_str();
+  // The midpoint between the parts of the path that do and don't exist
+  const auto* pathMidpoint = pathIn.c_str() + pathIn.size();
+  const auto* pathEnd = pathIn.c_str() + pathIn.size();
+
+  for (;;) {
+    auto const widePathLen = MultiByteToWideChar(
+      CP_UTF8, 0, pathBegin, int(pathMidpoint - pathBegin), nullptr, 0);
+    std::wstring widePath(widePathLen, L'\0');
+    MultiByteToWideChar(CP_UTF8, 0, pathBegin, int(pathMidpoint - pathBegin),
+                        widePath.data(), widePathLen);
+
+    const auto hFile = CreateFileW(
+      widePath.c_str(), GENERIC_READ,
+      FILE_SHARE_READ, // Don't lock the file
+      nullptr,
+      OPEN_EXISTING,              // Don't create the file
+      FILE_FLAG_BACKUP_SEMANTICS, // Required for opening Directories
+      nullptr);
+
+    // If an error occurs, try the next path component
+    if (hFile == INVALID_HANDLE_VALUE) {
+      if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd))
+        return casePath;
+      continue;
+    }
+
+    // Get the actual case correct path
+    auto length = GetFinalPathNameByHandleW(hFile, actualPath.data(),
+                                            actualPath.size(), 0);
+
+    // If an error occurs, try the next path component
+    if (length <= 0) {
+      if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd))
+        return casePath;
+      continue;
+    }
+
+    auto const shouldEnlarge = length >= actualPath.size();
+    actualPath.resize(length);
+
+    // If the buffer was too small, retry with a bigger buffer
+    if (shouldEnlarge) {
+      length = GetFinalPathNameByHandleW(hFile, actualPath.data(),
+                                         actualPath.size(), 0);
+
+      // If an error occurs, try the next path component
+      if (length <= 0) {
+        if (!GetNextPathComponent(pathBegin, pathMidpoint, pathEnd))
+          return casePath;
+        continue;
       }
     }
 
-    casePath += path_components[idx];
+    // Fixup long path prefix
+    if (!SystemTools::StringStartsWith(pathIn, "\\\\?\\")) {
+      actualPath.erase(0, 4);
+    }
+
+    // Fixup path separators
+    auto* actualPathPtr = actualPath.data() + actualPath.size();
+    const auto* pathPtr = pathMidpoint - 1;
+    for (;;) {
+      while (actualPathPtr >= actualPath.data() && *actualPathPtr != L'\\' &&
+             *actualPathPtr != L'/') {
+        --actualPathPtr;
+      }
+
+      if (actualPathPtr < actualPath.data())
+        break;
+
+      while (pathPtr >= pathBegin && *pathPtr != '\\' && *pathPtr != '/') {
+        --pathPtr;
+      }
+
+      if (pathPtr < pathBegin)
+        break;
+
+      *actualPathPtr = wchar_t(*pathPtr);
+      --actualPathPtr;
+      --pathPtr;
+    }
+
+    casePath = Encoding::ToNarrow(actualPath);
+
+    // Fixup missing path separator
+    if (pathMidpoint != pathEnd && casePath.back() != '/' &&
+        casePath.back() != '\\' && pathBegin < pathMidpoint)
+      casePath.push_back(*(pathMidpoint - 1));
+
+    // Fixup symlinks
+    auto casePathIt = casePath.size();
+    auto pathInIt = size_t(pathMidpoint - pathBegin);
+    while (casePathIt-- > 0 && pathInIt-- > 0) {
+      // Skip short paths
+      if (pathInIt > 0 && !strncmp(pathIn.c_str() + pathInIt - 1, "~1", 2))
+        break;
+
+      if (pathInIt == 0 && casePathIt > 0) {
+        casePath.erase(0, casePathIt);
+        break;
+      }
+
+      if (tolower(casePath[casePathIt]) == tolower(pathIn[pathInIt]))
+        continue;
+
+      casePath.replace(0, casePathIt + 1, pathIn.c_str(), pathInIt + 1);
+
+      break;
+    }
+
+    // append the rest of the path, which doesn't exist
+    if (pathMidpoint != pathEnd)
+      casePath.append(pathMidpoint);
+
+    CloseHandle(hFile);
+    return casePath;
   }
-  return casePath;
 }
 
 std::string SystemToolsStatic::GetActualCaseForPathCached(std::string const& p)
-- 
GitLab