From 5d6204e929a6326e5bd8275d6e03e922f7fd813a Mon Sep 17 00:00:00 2001
From: Domen Vrankar <domen.vrankar@gmail.com>
Date: Sun, 1 Mar 2015 22:29:54 +0100
Subject: [PATCH] Glob: Handle symlink cycles in directory paths

Prevent cyclic recursion of type "a/b/c -> a" when glob recurse is used
with follow symlinks so that each directory symbolic link is traversed
only once and skipped on revisit.

Change-Id: I6f52489198d692c3c0b0d94986db0e664d050342
---
 Glob.cxx    | 77 ++++++++++++++++++++++++++++++++++++++++++++---------
 Glob.hxx.in | 36 ++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/Glob.cxx b/Glob.cxx
index 1476c25e..17d551b2 100644
--- a/Glob.cxx
+++ b/Glob.cxx
@@ -19,6 +19,7 @@
 #include KWSYS_HEADER(Directory.hxx)
 #include KWSYS_HEADER(stl/string)
 #include KWSYS_HEADER(stl/vector)
+#include KWSYS_HEADER(stl/algorithm)
 
 // Work-around CMake dependency scanning limitation.  This must
 // duplicate the above list of headers.
@@ -30,6 +31,8 @@
 # include "SystemTools.hxx.in"
 # include "kwsys_stl.hxx.in"
 # include "kwsys_stl_string.hxx.in"
+# include "kwsys_stl_vector.hxx.in"
+# include "kwsys_stl_algorithm.hxx.in"
 #endif
 
 #include <ctype.h>
@@ -214,13 +217,13 @@ kwsys_stl::string Glob::PatternToRegex(const kwsys_stl::string& pattern,
 }
 
 //----------------------------------------------------------------------------
-void Glob::RecurseDirectory(kwsys_stl::string::size_type start,
-  const kwsys_stl::string& dir)
+bool Glob::RecurseDirectory(kwsys_stl::string::size_type start,
+  const kwsys_stl::string& dir, GlobMessages* messages)
 {
   kwsys::Directory d;
   if ( !d.Load(dir) )
     {
-    return;
+    return true;
     }
   unsigned long cc;
   kwsys_stl::string realname;
@@ -255,8 +258,57 @@ void Glob::RecurseDirectory(kwsys_stl::string::size_type start,
       if (isSymLink)
         {
         ++this->FollowedSymlinkCount;
+        kwsys_stl::string realPathErrorMessage;
+        kwsys_stl::string canonicalPath(SystemTools::GetRealPath(dir,
+            &realPathErrorMessage));
+
+        if(!realPathErrorMessage.empty())
+          {
+          if(messages)
+            {
+            messages->push_back(Message(
+                Glob::error, "Canonical path generation from path '"
+                + dir + "' failed! Reason: '" + realPathErrorMessage + "'"));
+            }
+          return false;
+          }
+
+        if(kwsys_stl::find(this->VisitedSymlinks.begin(),
+            this->VisitedSymlinks.end(),
+            canonicalPath) == this->VisitedSymlinks.end())
+          {
+          this->VisitedSymlinks.push_back(canonicalPath);
+          if(!this->RecurseDirectory(start+1, realname, messages))
+            {
+            this->VisitedSymlinks.pop_back();
+
+            return false;
+            }
+          this->VisitedSymlinks.pop_back();
+          }
+        // else we have already visited this symlink - prevent cyclic recursion
+        else if(messages)
+          {
+          kwsys_stl::string message;
+          for(kwsys_stl::vector<kwsys_stl::string>::const_iterator
+                pathIt = kwsys_stl::find(this->VisitedSymlinks.begin(),
+                                         this->VisitedSymlinks.end(),
+                                         canonicalPath);
+              pathIt != this->VisitedSymlinks.end(); ++pathIt)
+            {
+            message += *pathIt + "\n";
+            }
+          message += canonicalPath + "/" + fname;
+          messages->push_back(Message(Glob::cyclicRecursion, message));
+          }
+        }
+      else
+        {
+        if(!this->RecurseDirectory(start+1, realname, messages))
+          {
+          return false;
+          }
         }
-      this->RecurseDirectory(start+1, realname);
       }
     else
       {
@@ -267,17 +319,19 @@ void Glob::RecurseDirectory(kwsys_stl::string::size_type start,
         }
       }
     }
+
+  return true;
 }
 
 //----------------------------------------------------------------------------
 void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
-  const kwsys_stl::string& dir)
+  const kwsys_stl::string& dir, GlobMessages* messages)
 {
   //kwsys_ios::cout << "ProcessDirectory: " << dir << kwsys_ios::endl;
   bool last = ( start == this->Internals->Expressions.size()-1 );
   if ( last && this->Recurse )
     {
-    this->RecurseDirectory(start, dir);
+    this->RecurseDirectory(start, dir, messages);
     return;
     }
 
@@ -321,8 +375,7 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
     // << this->Internals->TextExpressions[start].c_str() << kwsys_ios::endl;
     //kwsys_ios::cout << "Real name: " << realname << kwsys_ios::endl;
 
-    if ( !last &&
-      !kwsys::SystemTools::FileIsDirectory(realname) )
+    if( !last && !kwsys::SystemTools::FileIsDirectory(realname) )
       {
       continue;
       }
@@ -335,14 +388,14 @@ void Glob::ProcessDirectory(kwsys_stl::string::size_type start,
         }
       else
         {
-        this->ProcessDirectory(start+1, realname);
+        this->ProcessDirectory(start+1, realname, messages);
         }
       }
     }
 }
 
 //----------------------------------------------------------------------------
-bool Glob::FindFiles(const kwsys_stl::string& inexpr)
+bool Glob::FindFiles(const kwsys_stl::string& inexpr, GlobMessages* messages)
 {
   kwsys_stl::string cexpr;
   kwsys_stl::string::size_type cc;
@@ -438,11 +491,11 @@ bool Glob::FindFiles(const kwsys_stl::string& inexpr)
   // Handle network paths
   if ( skip > 0 )
     {
-    this->ProcessDirectory(0, fexpr.substr(0, skip) + "/");
+    this->ProcessDirectory(0, fexpr.substr(0, skip) + "/", messages);
     }
   else
     {
-    this->ProcessDirectory(0, "/");
+    this->ProcessDirectory(0, "/", messages);
     }
   return true;
 }
diff --git a/Glob.hxx.in b/Glob.hxx.in
index d8b8491d..0d40d029 100644
--- a/Glob.hxx.in
+++ b/Glob.hxx.in
@@ -39,12 +39,37 @@ class GlobInternals;
  */
 class @KWSYS_NAMESPACE@_EXPORT Glob
 {
+public:
+  enum MessageType
+  {
+    error,
+    cyclicRecursion
+  };
+
+  struct Message
+  {
+    MessageType type;
+    kwsys_stl::string content;
+
+    Message(MessageType t, const kwsys_stl::string& c) :
+      type(t),
+      content(c)
+    {}
+    Message(const Message& msg) :
+      type(msg.type),
+      content(msg.content)
+    {}
+  };
+
+  typedef kwsys_stl::vector<Message> GlobMessages;
+  typedef kwsys_stl::vector<Message>::iterator GlobMessagesIterator;
 public:
   Glob();
   ~Glob();
 
   //! Find all files that match the pattern.
-  bool FindFiles(const kwsys_stl::string& inexpr);
+  bool FindFiles(const kwsys_stl::string& inexpr,
+    GlobMessages* messages = 0);
 
   //! Return the list of files that matched.
   kwsys_stl::vector<kwsys_stl::string>& GetFiles();
@@ -83,12 +108,14 @@ public:
 protected:
   //! Process directory
   void ProcessDirectory(kwsys_stl::string::size_type start,
-    const kwsys_stl::string& dir);
+    const kwsys_stl::string& dir,
+    GlobMessages* messages);
 
   //! Process last directory, but only when recurse flags is on. That is
   // effectively like saying: /path/to/file/**/file
-  void RecurseDirectory(kwsys_stl::string::size_type start,
-    const kwsys_stl::string& dir);
+  bool RecurseDirectory(kwsys_stl::string::size_type start,
+    const kwsys_stl::string& dir,
+    GlobMessages* messages);
 
   //! Add regular expression
   void AddExpression(const kwsys_stl::string& expr);
@@ -101,6 +128,7 @@ protected:
   kwsys_stl::string Relative;
   bool RecurseThroughSymlinks;
   unsigned int FollowedSymlinkCount;
+  kwsys_stl::vector<kwsys_stl::string> VisitedSymlinks;
 
 private:
   Glob(const Glob&);  // Not implemented.
-- 
GitLab