From 5df21adf46be14061fe0e8ea53213d905e834600 Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul@tambre.ee>
Date: Sat, 7 Mar 2020 17:29:29 +0200
Subject: [PATCH] CUDA: Add support for Clang compiler

When crosscompiling we pass the sysroot.

We need to try various architecture flags. Clang doesn't automatically
select one that works.  First try the ones that are more likely to work
for modern installations:

* <=sm_50 is deprecated since CUDA 10.2, try sm_52 first for
          future compatibility.
* <=sm_20 is removed since CUDA 9.0, try sm_30.

Otherwise fallback to Clang's current default. Currently that's `sm_20`,
the lowest it supports.

Separable compilation isn't supported yet.

Fixes: #16586
---
 Help/prop_tgt/CUDA_STANDARD.rst            |  2 +-
 Help/release/dev/cuda-clang.rst            |  4 ++
 Modules/CMakeCUDAInformation.cmake         | 27 +++++-----
 Modules/CMakeCompilerIdDetection.cmake     |  3 +-
 Modules/CMakeDetermineCUDACompiler.cmake   | 60 ++++++++++++++++++++--
 Modules/CMakeTestCUDACompiler.cmake        |  9 +++-
 Modules/Compiler/Clang-CUDA.cmake          | 25 +++++++++
 Modules/Compiler/NVIDIA-CUDA.cmake         |  4 ++
 Modules/Platform/Windows-NVIDIA-CUDA.cmake |  4 ++
 Source/cmGeneratorTarget.cxx               | 14 +++++
 10 files changed, 129 insertions(+), 23 deletions(-)
 create mode 100644 Help/release/dev/cuda-clang.rst
 create mode 100644 Modules/Compiler/Clang-CUDA.cmake

diff --git a/Help/prop_tgt/CUDA_STANDARD.rst b/Help/prop_tgt/CUDA_STANDARD.rst
index a3a2f56891..6d6774e4f3 100644
--- a/Help/prop_tgt/CUDA_STANDARD.rst
+++ b/Help/prop_tgt/CUDA_STANDARD.rst
@@ -7,7 +7,7 @@ This property specifies the CUDA/C++ standard whose features are requested
 to build this target.  For some compilers, this results in adding a
 flag such as ``-std=gnu++11`` to the compile line.
 
-Supported values are ``98``, ``11``, ``14``.
+Supported values are ``98``, ``03``, ``11``, ``14``, ``17``, ``20``.
 
 If the value requested does not result in a compile flag being added for
 the compiler in use, a previous standard flag will be added instead.  This
diff --git a/Help/release/dev/cuda-clang.rst b/Help/release/dev/cuda-clang.rst
new file mode 100644
index 0000000000..fa5cd5a38a
--- /dev/null
+++ b/Help/release/dev/cuda-clang.rst
@@ -0,0 +1,4 @@
+cuda-clang
+----------
+
+* The ``CUDA`` language now supports Clang as a compiler.
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index e8b60b607b..f9f7574524 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -8,6 +8,19 @@ else()
 endif()
 set(CMAKE_INCLUDE_FLAG_CUDA "-I")
 
+# Set implicit links early so compiler-specific modules can use them.
+set(__IMPLICT_LINKS )
+foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
+endforeach()
+foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
+  if(${lib} MATCHES "/")
+    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
+  else()
+    string(APPEND __IMPLICT_LINKS " -l${lib}")
+  endif()
+endforeach()
+
 # Load compiler-specific information.
 if(CMAKE_CUDA_COMPILER_ID)
   include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
@@ -97,22 +110,10 @@ include(CMakeCommonLanguageInclude)
 # CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
 # CMAKE_CUDA_LINK_EXECUTABLE
 
-if(CMAKE_CUDA_HOST_COMPILER)
+if(CMAKE_CUDA_HOST_COMPILER AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   string(APPEND _CMAKE_CUDA_EXTRA_FLAGS " -ccbin=<CMAKE_CUDA_HOST_COMPILER>")
 endif()
 
-set(__IMPLICT_LINKS )
-foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
-  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
-endforeach()
-foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
-  if(${lib} MATCHES "/")
-    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
-  else()
-    string(APPEND __IMPLICT_LINKS " -l${lib}")
-  endif()
-endforeach()
-
 # create a shared library
 if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
   set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
diff --git a/Modules/CMakeCompilerIdDetection.cmake b/Modules/CMakeCompilerIdDetection.cmake
index bb573b7dc5..acd15df98f 100644
--- a/Modules/CMakeCompilerIdDetection.cmake
+++ b/Modules/CMakeCompilerIdDetection.cmake
@@ -89,9 +89,8 @@ function(compiler_id_detection outvar lang)
       )
     endif()
 
-    #Currently the only CUDA compilers are NVIDIA
     if(lang STREQUAL CUDA)
-      set(ordered_compilers NVIDIA)
+      set(ordered_compilers NVIDIA Clang)
     endif()
 
     if(CID_ID_DEFINE)
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index ead4125a02..af3668805b 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -2,7 +2,7 @@
 # file Copyright.txt or https://cmake.org/licensing for details.
 
 include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
-include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake)
+include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake)
 
 if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
           ("${CMAKE_GENERATOR}" MATCHES "Ninja") OR
@@ -57,16 +57,39 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
     CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT)
 
-  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA)
-  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver")
+  list(APPEND CMAKE_CUDA_COMPILER_ID_VENDORS NVIDIA Clang)
+  set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \\(R\\) Cuda compiler driver")
+  set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_Clang "(clang version)")
 
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
+  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
 
-  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS -v --keep --keep-dir tmp)
+  # nvcc
+  set(nvcc_test_flags "--keep --keep-dir tmp")
   if(CMAKE_CUDA_HOST_COMPILER)
-      list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
+    string(APPEND nvcc_test_flags " -ccbin=${CMAKE_CUDA_HOST_COMPILER}")
   endif()
+  list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST ${nvcc_test_flags})
+
+  # Clang
+  if(CMAKE_CROSSCOMPILING)
+    # Need to pass the host target and include directories if we're crosscompiling.
+    set(clang_test_flags "--sysroot=\"${CMAKE_SYSROOT}\" --target=${CMAKE_CUDA_COMPILER_TARGET}")
+  else()
+    set(clang_test_flags)
+  endif()
+
+  # Clang doesn't automatically select an architecture supported by the SDK.
+  # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
+  foreach(arch ${CMAKE_CUDA_ARCHITECTURES} "20" "30" "52")
+    # Strip specifiers.
+    string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch_name}")
+  endforeach()
+
+  # Finally also try the default.
+  list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}")
 
   include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
   CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
@@ -89,6 +112,33 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
+elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+  # Parse default CUDA architecture.
+  if(NOT CMAKE_CUDA_ARCHITECTURES)
+    string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+    set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures")
+
+    if(NOT CMAKE_CUDA_ARCHITECTURES)
+      message(FATAL_ERROR "Failed to find default CUDA architecture.")
+    endif()
+  endif()
+
+  # Parsing implicit host linker info is as simple as for regular Clang.
+  CMAKE_PARSE_IMPLICIT_LINK_INFO("${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}"
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
+                                 log
+                                 "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}")
+
+  # Get SDK directory.
+  string(REGEX MATCH "Found CUDA installation: (.+), version" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  set(__cuda_directory "${CMAKE_MATCH_1}")
+
+  # Clang doesn't add the SDK library directory to the implicit link path. Do it ourselves, so stuff works.
+  include(Internal/CUDAToolkit)
+  set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDAToolkit_INCLUDE_DIR}")
+  list(APPEND CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "${CUDAToolkit_LIBRARY_DIR}")
 elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   set(_nvcc_log "")
   string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake
index 5ad725702b..b3b62bdcb7 100644
--- a/Modules/CMakeTestCUDACompiler.cmake
+++ b/Modules/CMakeTestCUDACompiler.cmake
@@ -74,9 +74,14 @@ else()
   # - cudart_static
   # - cudadevrt
   #
+  # Additionally on Linux:
+  # - rt
+  # - pthread
+  # - dl
+  #
   # These are controlled by CMAKE_CUDA_RUNTIME_LIBRARY
-  list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
-  list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
+  list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
+  list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
 
   if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
     # Remove the CUDA Toolkit include directories from the set of
diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake
new file mode 100644
index 0000000000..a970985b0e
--- /dev/null
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -0,0 +1,25 @@
+include(Compiler/Clang)
+__compiler_clang(CUDA)
+
+# C++03 isn't supported for CXX, but is for CUDA, so we need to set these manually.
+# Do this before __compiler_clang_cxx_standards() since that adds the feature.
+set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "-std=c++03")
+set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "-std=gnu++03")
+__compiler_clang_cxx_standards(CUDA)
+
+set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
+set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
+set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
+
+# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
+set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+set(CMAKE_CUDA_CREATE_SHARED_LIBRARY "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+
+set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE   "")
+
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 4b09e6f510..eb8d55cb6a 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -64,6 +64,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC  "cudadevrt;cudart_static")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED  "cudadevrt;cudart")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE    "")
 
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
+
 if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
   set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "")
   set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "")
diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
index f425cf851f..f8875941d8 100644
--- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake
+++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
@@ -74,6 +74,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC  "cudadevrt;cudart_static")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED  "cudadevrt;cudart")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE    "")
 
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
+
 string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"")
 string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"")
 string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG")
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index a44126c13c..75790c8dee 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -3179,6 +3179,20 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
 
       flags += "]";
     }
+  } else if (compiler == "Clang") {
+    for (CudaArchitecture& architecture : architectures) {
+      flags += " --cuda-gpu-arch=sm_" + architecture.name;
+
+      if (!architecture.real) {
+        Makefile->IssueMessage(
+          MessageType::WARNING,
+          "Clang doesn't support disabling CUDA real code generation.");
+      }
+
+      if (!architecture.virtual_) {
+        flags += " --no-cuda-include-ptx=sm_" + architecture.name;
+      }
+    }
   }
 }
 
-- 
GitLab