From acee32bd1c66a43fd9719476a3ca82a13c24e88e Mon Sep 17 00:00:00 2001
From: Mathieu Westphal <mathieu.westphal@kitware.com>
Date: Thu, 13 Feb 2020 16:38:12 +0000
Subject: [PATCH] Adding DecodeUrl and GetFilePathFromURI methods

---
 SystemTools.cxx     | 38 ++++++++++++++++++++++++++++++++++----
 SystemTools.hxx.in  | 14 ++++++++++++--
 testSystemTools.cxx | 30 ++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/SystemTools.cxx b/SystemTools.cxx
index 39873e63..c68487a3 100644
--- a/SystemTools.cxx
+++ b/SystemTools.cxx
@@ -123,9 +123,9 @@ extern char** environ;
 
 #define VTK_URL_PROTOCOL_REGEX "([a-zA-Z0-9]*)://(.*)"
 #define VTK_URL_REGEX                                                         \
-  "([a-zA-Z0-9]*)://(([A-Za-z0-9]+)(:([^:@]+))?@)?([^:@/]+)(:([0-9]+))?/"     \
+  "([a-zA-Z0-9]*)://(([A-Za-z0-9]+)(:([^:@]+))?@)?([^:@/]*)(:([0-9]+))?/"     \
   "(.+)?"
-
+#define VTK_URL_BYTE_REGEX "%[0-9a-fA-F][0-9a-fA-F]"
 #ifdef _MSC_VER
 #  include <sys/utime.h>
 #else
@@ -4513,7 +4513,7 @@ std::string SystemTools::GetOperatingSystemNameAndVersion()
 
 bool SystemTools::ParseURLProtocol(const std::string& URL,
                                    std::string& protocol,
-                                   std::string& dataglom)
+                                   std::string& dataglom, bool decode)
 {
   // match 0 entire url
   // match 1 protocol
@@ -4526,13 +4526,17 @@ bool SystemTools::ParseURLProtocol(const std::string& URL,
   protocol = urlRe.match(1);
   dataglom = urlRe.match(2);
 
+  if (decode) {
+    dataglom = DecodeURL(dataglom);
+  }
+
   return true;
 }
 
 bool SystemTools::ParseURL(const std::string& URL, std::string& protocol,
                            std::string& username, std::string& password,
                            std::string& hostname, std::string& dataport,
-                           std::string& database)
+                           std::string& database, bool decode)
 {
   kwsys::RegularExpression urlRe(VTK_URL_REGEX);
   if (!urlRe.find(URL))
@@ -4556,9 +4560,35 @@ bool SystemTools::ParseURL(const std::string& URL, std::string& protocol,
   dataport = urlRe.match(8);
   database = urlRe.match(9);
 
+  if (decode) {
+    username = DecodeURL(username);
+    password = DecodeURL(password);
+    hostname = DecodeURL(hostname);
+    dataport = DecodeURL(dataport);
+    database = DecodeURL(database);
+  }
+
   return true;
 }
 
+// ----------------------------------------------------------------------
+std::string SystemTools::DecodeURL(const std::string& url)
+{
+  kwsys::RegularExpression urlByteRe(VTK_URL_BYTE_REGEX);
+  std::string ret;
+  for (size_t i = 0; i < url.length(); i++) {
+    if (urlByteRe.find(url.substr(i, 3))) {
+      ret +=
+        static_cast<char>(strtoul(url.substr(i + 1, 2).c_str(), nullptr, 16));
+      i += 2;
+    } else {
+      ret += url[i];
+    }
+  }
+  return ret;
+}
+
+// ----------------------------------------------------------------------
 // These must NOT be initialized.  Default initialization to zero is
 // necessary.
 static unsigned int SystemToolsManagerCount;
diff --git a/SystemTools.hxx.in b/SystemTools.hxx.in
index c4ab9d4f..d4a93faf 100644
--- a/SystemTools.hxx.in
+++ b/SystemTools.hxx.in
@@ -935,22 +935,32 @@ public:
    * Parse a character string :
    *       protocol://dataglom
    * and fill protocol as appropriate.
+   * decode the dataglom using DecodeURL if set to true.
    * Return false if the URL does not have the required form, true otherwise.
    */
   static bool ParseURLProtocol(const std::string& URL, std::string& protocol,
-                               std::string& dataglom);
+                               std::string& dataglom, bool decode = false);
 
   /**
    * Parse a string (a URL without protocol prefix) with the form:
    *  protocol://[[username[':'password]'@']hostname[':'dataport]]'/'[datapath]
    * and fill protocol, username, password, hostname, dataport, and datapath
    * when values are found.
+   * decode all string except the protocol using DecodeUrl if set to true.
    * Return true if the string matches the format; false otherwise.
    */
   static bool ParseURL(const std::string& URL, std::string& protocol,
                        std::string& username, std::string& password,
                        std::string& hostname, std::string& dataport,
-                       std::string& datapath);
+                       std::string& datapath, bool decode = false);
+
+  /**
+   * Decode the percent-encoded string from an URL or an URI
+   * into their correct char values.
+   * Does not perform any other sort of validation.
+   * Return the decoded string
+   */
+  static std::string DecodeURL(const std::string& url);
 
 private:
   /**
diff --git a/testSystemTools.cxx b/testSystemTools.cxx
index 1f3a15b5..7032a100 100644
--- a/testSystemTools.cxx
+++ b/testSystemTools.cxx
@@ -1077,6 +1077,34 @@ static bool CheckCopyFileIfDifferent()
   return ret;
 }
 
+static bool CheckURLParsing()
+{
+  bool ret = true;
+  std::string url = "http://user:pw@hostname:42/full/url.com";
+
+  std::string protocol, username, password, hostname, dataport, database;
+  kwsys::SystemTools::ParseURL(url, protocol, username, password, hostname,
+                               dataport, database);
+  if (protocol != "http" || username != "user" || password != "pw" ||
+      hostname != "hostname" || dataport != "42" ||
+      database != "full/url.com") {
+    std::cerr << "Incorrect URL parsing" << std::endl;
+    ret = false;
+  }
+
+  std::string uri =
+    "file://hostname/path/to/"
+    "a%20file%20with%20str%C3%A0ng%C3%A8%20ch%40r%20and%20s%C2%B5aces";
+  kwsys::SystemTools::ParseURL(uri, protocol, username, password, hostname,
+                               dataport, database, true);
+  if (protocol != "file" || hostname != "hostname" ||
+      database != "path/to/a file with stràngè ch@r and sµaces") {
+    std::cerr << "Incorrect URL parsing or decoding" << std::endl;
+    ret = false;
+  }
+  return ret;
+}
+
 int testSystemTools(int, char* [])
 {
   bool res = true;
@@ -1124,5 +1152,7 @@ int testSystemTools(int, char* [])
 
   res &= CheckCopyFileIfDifferent();
 
+  res &= CheckURLParsing();
+
   return res ? 0 : 1;
 }
-- 
GitLab