From 19cc5bc296045fe2237d6db178c6ae70cc7722e0 Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul@tambre.ee>
Date: Tue, 26 May 2020 21:18:33 +0300
Subject: [PATCH] CUDA: Throw error if user-specified architectures don't work

Previously if an user specified CMAKE_CUDA_ARCHITECTURES and they didn't
work we would end up erroring during compiler testing.  Instead check if
the architectures we successfully compiled with are the same as the
user-specified (if any). If they don't match, then throw a more helpful
error than compiler testing would.

Additionally, to make this work correctly I made it try all
user-specified architectures at once instead of each separately.

Implements: #20756
---
 Modules/CMakeDetermineCUDACompiler.cmake | 39 ++++++++++++++++++++----
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index 4a8a268a7d..ed37d28ce8 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -80,12 +80,23 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
     set(clang_test_flags)
   endif()
 
+  # First try with the user-specified architectures.
+  if(CMAKE_CUDA_ARCHITECTURES)
+    set(clang_archs "${clang_test_flags}")
+
+    foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
+      # Strip specifiers as PTX vs binary doesn't matter.
+      string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+      string(APPEND clang_archs " --cuda-gpu-arch=sm_${arch_name}")
+    endforeach()
+
+    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_archs}")
+  endif()
+
   # Clang doesn't automatically select an architecture supported by the SDK.
   # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
-  foreach(arch ${CMAKE_CUDA_ARCHITECTURES} "20" "30" "52")
-    # Strip specifiers.
-    string(REGEX MATCH "[0-9]+" arch_name "${arch}")
-    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch_name}")
+  foreach(arch "20" "30" "52")
+    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch}")
   endforeach()
 
   # Finally also try the default.
@@ -127,13 +138,29 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
   set(_SET_CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT
     "set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT \"${CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT}\")")
 elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
-  # Parse default CUDA architecture.
   if(NOT CMAKE_CUDA_ARCHITECTURES)
+    # Find the architecture that we successfully compiled using and set it as the default.
     string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
     set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures")
 
     if(NOT CMAKE_CUDA_ARCHITECTURES)
-      message(FATAL_ERROR "Failed to find default CUDA architecture.")
+      message(FATAL_ERROR "Failed to find a working CUDA architecture.")
+    endif()
+  else()
+    string(REGEX MATCHALL "-target-cpu sm_([0-9]+)" target_cpus "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+
+    foreach(cpu ${target_cpus})
+      string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${cpu}")
+      list(APPEND architectures "${CMAKE_MATCH_1}")
+    endforeach()
+
+    if(NOT "${architectures}" STREQUAL "${CMAKE_CUDA_ARCHITECTURES}")
+      message(FATAL_ERROR
+        "The CMAKE_CUDA_ARCHITECTURES:\n"
+        "  ${CMAKE_CUDA_ARCHITECTURES}\n"
+        "do not all work with this compiler.  Try:\n"
+        "  ${architectures}\n"
+        "instead.")
     endif()
   endif()
 
-- 
GitLab