diff --git a/Help/prop_tgt/CUDA_STANDARD.rst b/Help/prop_tgt/CUDA_STANDARD.rst
index a3a2f56891b70cbedf618a9f9a177df3ffbbcbee..6d6774e4f3c0873cbcbe60aa19bf95bf28ad6107 100644
--- a/Help/prop_tgt/CUDA_STANDARD.rst
+++ b/Help/prop_tgt/CUDA_STANDARD.rst
@@ -7,7 +7,7 @@ This property specifies the CUDA/C++ standard whose features are requested
 to build this target.  For some compilers, this results in adding a
 flag such as ``-std=gnu++11`` to the compile line.
 
-Supported values are ``98``, ``11``, ``14``.
+Supported values are ``98``, ``03``, ``11``, ``14``, ``17``, ``20``.
 
 If the value requested does not result in a compile flag being added for
 the compiler in use, a previous standard flag will be added instead.  This
diff --git a/Help/release/dev/cuda-clang.rst b/Help/release/dev/cuda-clang.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fa5cd5a38a32fe0fae44c7fbeabeb94e7283e3c1
--- /dev/null
+++ b/Help/release/dev/cuda-clang.rst
@@ -0,0 +1,4 @@
+cuda-clang
+----------
+
+* The ``CUDA`` language now supports Clang as a compiler.
diff --git a/Modules/CMakeCUDAInformation.cmake b/Modules/CMakeCUDAInformation.cmake
index e8b60b607bd43c9347c16e27ee0eb3dace5b0c03..f9f75745245e17e6f71fefb44bc60953639e1670 100644
--- a/Modules/CMakeCUDAInformation.cmake
+++ b/Modules/CMakeCUDAInformation.cmake
@@ -8,6 +8,19 @@ else()
 endif()
 set(CMAKE_INCLUDE_FLAG_CUDA "-I")
 
+# Set implicit links early so compiler-specific modules can use them.
+set(__IMPLICT_LINKS )
+foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
+  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
+endforeach()
+foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
+  if(${lib} MATCHES "/")
+    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
+  else()
+    string(APPEND __IMPLICT_LINKS " -l${lib}")
+  endif()
+endforeach()
+
 # Load compiler-specific information.
 if(CMAKE_CUDA_COMPILER_ID)
   include(Compiler/${CMAKE_CUDA_COMPILER_ID}-CUDA OPTIONAL)
@@ -97,22 +110,10 @@ include(CMakeCommonLanguageInclude)
 # CMAKE_CUDA_COMPILE_SEPARABLE_COMPILATION
 # CMAKE_CUDA_LINK_EXECUTABLE
 
-if(CMAKE_CUDA_HOST_COMPILER)
+if(CMAKE_CUDA_HOST_COMPILER AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   string(APPEND _CMAKE_CUDA_EXTRA_FLAGS " -ccbin=<CMAKE_CUDA_HOST_COMPILER>")
 endif()
 
-set(__IMPLICT_LINKS )
-foreach(dir ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
-  string(APPEND __IMPLICT_LINKS " -L\"${dir}\"")
-endforeach()
-foreach(lib ${CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES})
-  if(${lib} MATCHES "/")
-    string(APPEND __IMPLICT_LINKS " \"${lib}\"")
-  else()
-    string(APPEND __IMPLICT_LINKS " -l${lib}")
-  endif()
-endforeach()
-
 # create a shared library
 if(NOT CMAKE_CUDA_CREATE_SHARED_LIBRARY)
   set(CMAKE_CUDA_CREATE_SHARED_LIBRARY
diff --git a/Modules/CMakeCompilerIdDetection.cmake b/Modules/CMakeCompilerIdDetection.cmake
index bb573b7dc5191bdd82d11e80ef02953ce3421a99..acd15df98f1ebe6b53c17390696f6515d3d6ae3c 100644
--- a/Modules/CMakeCompilerIdDetection.cmake
+++ b/Modules/CMakeCompilerIdDetection.cmake
@@ -89,9 +89,8 @@ function(compiler_id_detection outvar lang)
       )
     endif()
 
-    #Currently the only CUDA compilers are NVIDIA
     if(lang STREQUAL CUDA)
-      set(ordered_compilers NVIDIA)
+      set(ordered_compilers NVIDIA Clang)
     endif()
 
     if(CID_ID_DEFINE)
diff --git a/Modules/CMakeDetermineCUDACompiler.cmake b/Modules/CMakeDetermineCUDACompiler.cmake
index ead4125a0233c9cb00b60914a0edc575d390ac87..af3668805b265cfec41453acc8ea76bb4ed4524c 100644
--- a/Modules/CMakeDetermineCUDACompiler.cmake
+++ b/Modules/CMakeDetermineCUDACompiler.cmake
@@ -2,7 +2,7 @@
 # file Copyright.txt or https://cmake.org/licensing for details.
 
 include(${CMAKE_ROOT}/Modules/CMakeDetermineCompiler.cmake)
-include(${CMAKE_ROOT}/Modules//CMakeParseImplicitLinkInfo.cmake)
+include(${CMAKE_ROOT}/Modules/CMakeParseImplicitLinkInfo.cmake)
 
 if( NOT ( ("${CMAKE_GENERATOR}" MATCHES "Make") OR
           ("${CMAKE_GENERATOR}" MATCHES "Ninja") OR
@@ -57,16 +57,39 @@ if(NOT CMAKE_CUDA_COMPILER_ID_RUN)
   file(READ ${CMAKE_ROOT}/Modules/CMakePlatformId.h.in
     CMAKE_CUDA_COMPILER_ID_PLATFORM_CONTENT)
 
-  list(APPEND CMAKE_CUDA_COMPILER_ID_MATCH_VENDORS NVIDIA)
-  set(CMAKE_CUDA_COMPILER_ID_MATCH_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \(R\) Cuda compiler driver")
+  list(APPEND CMAKE_CUDA_COMPILER_ID_VENDORS NVIDIA Clang)
+  set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_NVIDIA "nvcc: NVIDIA \\(R\\) Cuda compiler driver")
+  set(CMAKE_CUDA_COMPILER_ID_VENDOR_REGEX_Clang "(clang version)")
 
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_REGEX "\nLd[^\n]*(\n[ \t]+[^\n]*)*\n[ \t]+([^ \t\r\n]+)[^\r\n]*-o[^\r\n]*CompilerIdCUDA/(\\./)?(CompilerIdCUDA.xctest/)?CompilerIdCUDA[ \t\n\\\"]")
   set(CMAKE_CXX_COMPILER_ID_TOOL_MATCH_INDEX 2)
+  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-v")
 
-  set(CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS -v --keep --keep-dir tmp)
+  # nvcc
+  set(nvcc_test_flags "--keep --keep-dir tmp")
   if(CMAKE_CUDA_HOST_COMPILER)
-      list(APPEND CMAKE_CUDA_COMPILER_ID_FLAGS_ALWAYS "-ccbin=${CMAKE_CUDA_HOST_COMPILER}")
+    string(APPEND nvcc_test_flags " -ccbin=${CMAKE_CUDA_HOST_COMPILER}")
   endif()
+  list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST ${nvcc_test_flags})
+
+  # Clang
+  if(CMAKE_CROSSCOMPILING)
+    # Need to pass the host target and include directories if we're crosscompiling.
+    set(clang_test_flags "--sysroot=\"${CMAKE_SYSROOT}\" --target=${CMAKE_CUDA_COMPILER_TARGET}")
+  else()
+    set(clang_test_flags)
+  endif()
+
+  # Clang doesn't automatically select an architecture supported by the SDK.
+  # Try in reverse order of deprecation with the most recent at front (i.e. the most likely to work for new setups).
+  foreach(arch ${CMAKE_CUDA_ARCHITECTURES} "20" "30" "52")
+    # Strip specifiers.
+    string(REGEX MATCH "[0-9]+" arch_name "${arch}")
+    list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags} --cuda-gpu-arch=sm_${arch_name}")
+  endforeach()
+
+  # Finally also try the default.
+  list(APPEND CMAKE_CUDA_COMPILER_ID_TEST_FLAGS_FIRST "${clang_test_flags}")
 
   include(${CMAKE_ROOT}/Modules/CMakeDetermineCompilerId.cmake)
   CMAKE_DETERMINE_COMPILER_ID(CUDA CUDAFLAGS CMakeCUDACompilerId.cu)
@@ -89,6 +112,33 @@ if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES "")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "")
   set(CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
+elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+  # Parse default CUDA architecture.
+  if(NOT CMAKE_CUDA_ARCHITECTURES)
+    string(REGEX MATCH "-target-cpu sm_([0-9]+)" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+    set(CMAKE_CUDA_ARCHITECTURES "${CMAKE_MATCH_1}" CACHE STRING "CUDA architectures")
+
+    if(NOT CMAKE_CUDA_ARCHITECTURES)
+      message(FATAL_ERROR "Failed to find default CUDA architecture.")
+    endif()
+  endif()
+
+  # Parsing implicit host linker info is as simple as for regular Clang.
+  CMAKE_PARSE_IMPLICIT_LINK_INFO("${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}"
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES
+                                 CMAKE_CUDA_HOST_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES
+                                 log
+                                 "${CMAKE_CUDA_IMPLICIT_OBJECT_REGEX}")
+
+  # Get SDK directory.
+  string(REGEX MATCH "Found CUDA installation: (.+), version" dont_care "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
+  set(__cuda_directory "${CMAKE_MATCH_1}")
+
+  # Clang doesn't add the SDK library directory to the implicit link path. Do it ourselves, so stuff works.
+  include(Internal/CUDAToolkit)
+  set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDAToolkit_INCLUDE_DIR}")
+  list(APPEND CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES "${CUDAToolkit_LIBRARY_DIR}")
 elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   set(_nvcc_log "")
   string(REPLACE "\r" "" _nvcc_output_orig "${CMAKE_CUDA_COMPILER_PRODUCED_OUTPUT}")
diff --git a/Modules/CMakeTestCUDACompiler.cmake b/Modules/CMakeTestCUDACompiler.cmake
index 05811a8a66e393e1126fe4943830deccf2861a35..b3b62bdcb7b6533842eb1ec26bd800ba0a0fdca0 100644
--- a/Modules/CMakeTestCUDACompiler.cmake
+++ b/Modules/CMakeTestCUDACompiler.cmake
@@ -74,20 +74,27 @@ else()
   # - cudart_static
   # - cudadevrt
   #
+  # Additionally on Linux:
+  # - rt
+  # - pthread
+  # - dl
+  #
   # These are controlled by CMAKE_CUDA_RUNTIME_LIBRARY
-  list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
-  list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt)
+  list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
+  list(REMOVE_ITEM CMAKE_CUDA_HOST_IMPLICIT_LINK_LIBRARIES cudart cudart_static cudadevrt rt pthread dl)
 
-  # Remove the CUDA Toolkit include directories from the set of
-  # implicit system include directories.
-  # This resolves the issue that NVCC doesn't specify these
-  # includes as SYSTEM includes when compiling device code, and sometimes
-  # they contain headers that generate warnings, so let users mark them
-  # as SYSTEM explicitly
-  if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
-    list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES
-      ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
-      )
+  if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+    # Remove the CUDA Toolkit include directories from the set of
+    # implicit system include directories.
+    # This resolves the issue that NVCC doesn't specify these
+    # includes as SYSTEM includes when compiling device code, and sometimes
+    # they contain headers that generate warnings, so let users mark them
+    # as SYSTEM explicitly
+    if(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
+      list(REMOVE_ITEM CMAKE_CUDA_IMPLICIT_INCLUDE_DIRECTORIES
+        ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}
+        )
+    endif()
   endif()
 
   # Re-configure to save learned information.
diff --git a/Modules/Compiler/Clang-CUDA.cmake b/Modules/Compiler/Clang-CUDA.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..a970985b0e1b87d2c25d2d59bb44ace11c94beb0
--- /dev/null
+++ b/Modules/Compiler/Clang-CUDA.cmake
@@ -0,0 +1,25 @@
+include(Compiler/Clang)
+__compiler_clang(CUDA)
+
+# C++03 isn't supported for CXX, but is for CUDA, so we need to set these manually.
+# Do this before __compiler_clang_cxx_standards() since that adds the feature.
+set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "-std=c++03")
+set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "-std=gnu++03")
+__compiler_clang_cxx_standards(CUDA)
+
+set(CMAKE_CUDA_COMPILER_HAS_DEVICE_LINK_PHASE TRUE)
+set(_CMAKE_COMPILE_AS_CUDA_FLAG "-x cuda")
+set(_CMAKE_CUDA_PTX_FLAG "--cuda-device-only -S")
+
+# RulePlaceholderExpander expands crosscompile variables like sysroot and target only for CMAKE_<LANG>_COMPILER. Override the default.
+set(CMAKE_CUDA_LINK_EXECUTABLE "<CMAKE_CUDA_COMPILER> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+set(CMAKE_CUDA_CREATE_SHARED_LIBRARY "<CMAKE_CUDA_COMPILER> <CMAKE_SHARED_LIBRARY_CUDA_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CUDA_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES>${__IMPLICT_LINKS}")
+
+set(CMAKE_CUDA_RUNTIME_LIBRARY_DEFAULT "STATIC")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "cudadevrt;cudart_static")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED "cudadevrt;cudart")
+set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE   "")
+
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
diff --git a/Modules/Compiler/NVIDIA-CUDA.cmake b/Modules/Compiler/NVIDIA-CUDA.cmake
index 4b09e6f5108705073588dbe3225e6ea5ec01f3cc..eb8d55cb6a772cc85ced3f074e8a05d292733a36 100644
--- a/Modules/Compiler/NVIDIA-CUDA.cmake
+++ b/Modules/Compiler/NVIDIA-CUDA.cmake
@@ -64,6 +64,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC  "cudadevrt;cudart_static")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED  "cudadevrt;cudart")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE    "")
 
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
+
 if("x${CMAKE_CUDA_SIMULATE_ID}" STREQUAL "xMSVC")
   set(CMAKE_CUDA03_STANDARD_COMPILE_OPTION "")
   set(CMAKE_CUDA03_EXTENSION_COMPILE_OPTION "")
diff --git a/Modules/FindCUDAToolkit.cmake b/Modules/FindCUDAToolkit.cmake
index 8d20ff9628d89415cea4960a2816630fa4b43926..b28892e727c4840d3f664f836211e5b3e07c9997 100644
--- a/Modules/FindCUDAToolkit.cmake
+++ b/Modules/FindCUDAToolkit.cmake
@@ -473,168 +473,8 @@ Result variables
 #
 ###############################################################################
 
-# For NVCC we can easily deduce the SDK binary directory from the compiler path.
-if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
-  get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
-  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
-  mark_as_advanced(CUDAToolkit_BIN_DIR)
-  unset(cuda_dir)
-endif()
-
-# Try language- or user-provided path first.
-if(CUDAToolkit_BIN_DIR)
-  find_program(CUDAToolkit_NVCC_EXECUTABLE
-    NAMES nvcc nvcc.exe
-    PATHS ${CUDAToolkit_BIN_DIR}
-    NO_DEFAULT_PATH
-    )
-endif()
-
-# Search using CUDAToolkit_ROOT
-find_program(CUDAToolkit_NVCC_EXECUTABLE
-  NAMES nvcc nvcc.exe
-  PATHS ENV CUDA_PATH
-  PATH_SUFFIXES bin
-)
-
-# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
-if (NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
-  # Declare error messages now, print later depending on find_package args.
-  set(fail_base "Could not find nvcc executable in path specified by")
-  set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
-  set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
-
-  if (CUDAToolkit_FIND_REQUIRED)
-    if (DEFINED CUDAToolkit_ROOT)
-      message(FATAL_ERROR ${cuda_root_fail})
-    elseif (DEFINED ENV{CUDAToolkit_ROOT})
-      message(FATAL_ERROR ${env_cuda_root_fail})
-    endif()
-  else()
-    if (NOT CUDAToolkit_FIND_QUIETLY)
-      if (DEFINED CUDAToolkit_ROOT)
-        message(STATUS ${cuda_root_fail})
-      elseif (DEFINED ENV{CUDAToolkit_ROOT})
-        message(STATUS ${env_cuda_root_fail})
-      endif()
-    endif()
-    set(CUDAToolkit_FOUND FALSE)
-    unset(fail_base)
-    unset(cuda_root_fail)
-    unset(env_cuda_root_fail)
-    return()
-  endif()
-endif()
-
-# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
-#
-# - Linux: /usr/local/cuda-X.Y
-# - macOS: /Developer/NVIDIA/CUDA-X.Y
-# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
-#
-# We will also search the default symlink location /usr/local/cuda first since
-# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
-# directory is the desired location.
-if (NOT CUDAToolkit_NVCC_EXECUTABLE)
-  if (UNIX)
-    if (NOT APPLE)
-      set(platform_base "/usr/local/cuda-")
-    else()
-      set(platform_base "/Developer/NVIDIA/CUDA-")
-    endif()
-  else()
-    set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
-  endif()
-
-  # Build out a descending list of possible cuda installations, e.g.
-  file(GLOB possible_paths "${platform_base}*")
-  # Iterate the glob results and create a descending list.
-  set(possible_versions)
-  foreach (p ${possible_paths})
-    # Extract version number from end of string
-    string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
-    if (IS_DIRECTORY ${p} AND p_version)
-      list(APPEND possible_versions ${p_version})
-    endif()
-  endforeach()
-
-  # Cannot use list(SORT) because that is alphabetical, we need numerical.
-  # NOTE: this is not an efficient sorting strategy.  But even if a user had
-  # every possible version of CUDA installed, this wouldn't create any
-  # significant overhead.
-  set(versions)
-  foreach (v ${possible_versions})
-    list(LENGTH versions num_versions)
-    # First version, nothing to compare with so just append.
-    if (num_versions EQUAL 0)
-      list(APPEND versions ${v})
-    else()
-      # Loop through list.  Insert at an index when comparison is
-      # VERSION_GREATER since we want a descending list.  Duplicates will not
-      # happen since this came from a glob list of directories.
-      set(i 0)
-      set(early_terminate FALSE)
-      while (i LESS num_versions)
-        list(GET versions ${i} curr)
-        if (v VERSION_GREATER curr)
-          list(INSERT versions ${i} ${v})
-          set(early_terminate TRUE)
-          break()
-        endif()
-        math(EXPR i "${i} + 1")
-      endwhile()
-      # If it did not get inserted, place it at the end.
-      if (NOT early_terminate)
-        list(APPEND versions ${v})
-      endif()
-    endif()
-  endforeach()
-
-  # With a descending list of versions, populate possible paths to search.
-  set(search_paths)
-  foreach (v ${versions})
-    list(APPEND search_paths "${platform_base}${v}")
-  endforeach()
-
-  # Force the global default /usr/local/cuda to the front on Unix.
-  if (UNIX)
-    list(INSERT search_paths 0 "/usr/local/cuda")
-  endif()
-
-  # Now search for nvcc again using the platform default search paths.
-  find_program(CUDAToolkit_NVCC_EXECUTABLE
-    NAMES nvcc nvcc.exe
-    PATHS ${search_paths}
-    PATH_SUFFIXES bin
-  )
-
-  # We are done with these variables now, cleanup for caller.
-  unset(platform_base)
-  unset(possible_paths)
-  unset(possible_versions)
-  unset(versions)
-  unset(i)
-  unset(early_terminate)
-  unset(search_paths)
-
-  if (NOT CUDAToolkit_NVCC_EXECUTABLE)
-    if (CUDAToolkit_FIND_REQUIRED)
-      message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
-    elseif(NOT CUDAToolkit_FIND_QUIETLY)
-      message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
-    endif()
-
-    set(CUDAToolkit_FOUND FALSE)
-    return()
-  endif()
-endif()
-
-if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
-  get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
-  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
-  mark_as_advanced(CUDAToolkit_BIN_DIR)
-  unset(cuda_dir)
-endif()
+# Include shared CUDA toolkit location code.
+include(Internal/CUDAToolkit)
 
 if(CUDAToolkit_NVCC_EXECUTABLE AND
    CUDAToolkit_NVCC_EXECUTABLE STREQUAL CMAKE_CUDA_COMPILER)
@@ -658,72 +498,22 @@ else()
   unset(NVCC_OUT)
 endif()
 
-
-get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
-
-# Handle cross compilation
-if(CMAKE_CROSSCOMPILING)
-  if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
-    # Support for NVPACK
-    set (CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
-    # Support for arm cross compilation
-    set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
-  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
-    # Support for aarch64 cross compilation
-    if (ANDROID_ARCH_NAME STREQUAL "arm64")
-      set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
-    else()
-      set(CUDAToolkit_TARGET_NAME "aarch64-linux")
-    endif (ANDROID_ARCH_NAME STREQUAL "arm64")
-  elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
-      set(CUDAToolkit_TARGET_NAME "x86_64-linux")
-  endif()
-
-  if (EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
-    set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
-    # add known CUDA target root path to the set of directories we search for programs, libraries and headers
-    list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
-
-    # Mark that we need to pop the root search path changes after we have
-    # found all cuda libraries so that searches for our cross-compilation
-    # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
-    # PATh
-    set(_CUDAToolkit_Pop_ROOT_PATH True)
-  endif()
-else()
-  # Not cross compiling
-  set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
-  # Now that we have the real ROOT_DIR, find components inside it.
-  list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
-
-  # Mark that we need to pop the prefix path changes after we have
-  # found the cudart library.
-  set(_CUDAToolkit_Pop_Prefix True)
+if(NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
+  message(STATUS "Unable to find cudart library.")
 endif()
 
-
-# Find the include/ directory
-find_path(CUDAToolkit_INCLUDE_DIR
-  NAMES cuda_runtime.h
-)
-
-# And find the CUDA Runtime Library libcudart
+# Find the CUDA Runtime Library libcudart
 find_library(CUDA_CUDART
   NAMES cudart
   PATH_SUFFIXES lib64 lib/x64
 )
-if (NOT CUDA_CUDART)
+if(NOT CUDA_CUDART)
   find_library(CUDA_CUDART
     NAMES cudart
     PATH_SUFFIXES lib64/stubs lib/x64/stubs
   )
 endif()
 
-if (NOT CUDA_CUDART AND NOT CUDAToolkit_FIND_QUIETLY)
-  message(STATUS "Unable to find cudart library.")
-endif()
-
 unset(CUDAToolkit_ROOT_DIR)
 if(_CUDAToolkit_Pop_Prefix)
   list(REMOVE_AT CMAKE_PREFIX_PATH -1)
@@ -749,8 +539,8 @@ mark_as_advanced(CUDA_CUDART
 #-----------------------------------------------------------------------------
 # Construct result variables
 if(CUDAToolkit_FOUND)
- set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
- get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
+  set(CUDAToolkit_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIR})
+  get_filename_component(CUDAToolkit_LIBRARY_DIR ${CUDA_CUDART} DIRECTORY ABSOLUTE)
 endif()
 
 #-----------------------------------------------------------------------------
diff --git a/Modules/Internal/CUDAToolkit.cmake b/Modules/Internal/CUDAToolkit.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ab1e568077ceb9710301958914efc5eea15f6449
--- /dev/null
+++ b/Modules/Internal/CUDAToolkit.cmake
@@ -0,0 +1,225 @@
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+# This file is for sharing code for finding basic CUDA toolkit information between
+# CMakeDetermineCUDACompiler.cmake and FindCUDAToolkit.cmake.
+
+# For NVCC we can easily deduce the SDK binary directory from the compiler path.
+if(CMAKE_CUDA_COMPILER_LOADED AND NOT CUDAToolkit_BIN_DIR AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+  get_filename_component(cuda_dir "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "")
+  mark_as_advanced(CUDAToolkit_BIN_DIR)
+  unset(cuda_dir)
+endif()
+
+# Try language- or user-provided path first.
+if(CUDAToolkit_BIN_DIR)
+  find_program(CUDAToolkit_NVCC_EXECUTABLE
+    NAMES nvcc nvcc.exe
+    PATHS ${CUDAToolkit_BIN_DIR}
+    NO_DEFAULT_PATH
+    )
+endif()
+
+# Search using CUDAToolkit_ROOT
+find_program(CUDAToolkit_NVCC_EXECUTABLE
+  NAMES nvcc nvcc.exe
+  PATHS ENV CUDA_PATH
+  PATH_SUFFIXES bin
+)
+
+# If the user specified CUDAToolkit_ROOT but nvcc could not be found, this is an error.
+if(NOT CUDAToolkit_NVCC_EXECUTABLE AND (DEFINED CUDAToolkit_ROOT OR DEFINED ENV{CUDAToolkit_ROOT}))
+  # Declare error messages now, print later depending on find_package args.
+  set(fail_base "Could not find nvcc executable in path specified by")
+  set(cuda_root_fail "${fail_base} CUDAToolkit_ROOT=${CUDAToolkit_ROOT}")
+  set(env_cuda_root_fail "${fail_base} environment variable CUDAToolkit_ROOT=$ENV{CUDAToolkit_ROOT}")
+
+  if(CUDAToolkit_FIND_REQUIRED)
+    if(DEFINED CUDAToolkit_ROOT)
+      message(FATAL_ERROR ${cuda_root_fail})
+    elseif(DEFINED ENV{CUDAToolkit_ROOT})
+      message(FATAL_ERROR ${env_cuda_root_fail})
+    endif()
+  else()
+    if(NOT CUDAToolkit_FIND_QUIETLY)
+      if(DEFINED CUDAToolkit_ROOT)
+        message(STATUS ${cuda_root_fail})
+      elseif(DEFINED ENV{CUDAToolkit_ROOT})
+        message(STATUS ${env_cuda_root_fail})
+      endif()
+    endif()
+    set(CUDAToolkit_FOUND FALSE)
+    unset(fail_base)
+    unset(cuda_root_fail)
+    unset(env_cuda_root_fail)
+    return()
+  endif()
+endif()
+
+# CUDAToolkit_ROOT cmake / env variable not specified, try platform defaults.
+#
+# - Linux: /usr/local/cuda-X.Y
+# - macOS: /Developer/NVIDIA/CUDA-X.Y
+# - Windows: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\vX.Y
+#
+# We will also search the default symlink location /usr/local/cuda first since
+# if CUDAToolkit_ROOT is not specified, it is assumed that the symlinked
+# directory is the desired location.
+if(NOT CUDAToolkit_NVCC_EXECUTABLE)
+  if(UNIX)
+    if(NOT APPLE)
+      set(platform_base "/usr/local/cuda-")
+    else()
+      set(platform_base "/Developer/NVIDIA/CUDA-")
+    endif()
+  else()
+    set(platform_base "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v")
+  endif()
+
+  # Build out a descending list of possible cuda installations, e.g.
+  file(GLOB possible_paths "${platform_base}*")
+  # Iterate the glob results and create a descending list.
+  set(possible_versions)
+  foreach (p ${possible_paths})
+    # Extract version number from end of string
+    string(REGEX MATCH "[0-9][0-9]?\\.[0-9]$" p_version ${p})
+    if(IS_DIRECTORY ${p} AND p_version)
+      list(APPEND possible_versions ${p_version})
+    endif()
+  endforeach()
+
+  # Cannot use list(SORT) because that is alphabetical, we need numerical.
+  # NOTE: this is not an efficient sorting strategy.  But even if a user had
+  # every possible version of CUDA installed, this wouldn't create any
+  # significant overhead.
+  set(versions)
+  foreach (v ${possible_versions})
+    list(LENGTH versions num_versions)
+    # First version, nothing to compare with so just append.
+    if(num_versions EQUAL 0)
+      list(APPEND versions ${v})
+    else()
+      # Loop through list.  Insert at an index when comparison is
+      # VERSION_GREATER since we want a descending list.  Duplicates will not
+      # happen since this came from a glob list of directories.
+      set(i 0)
+      set(early_terminate FALSE)
+      while (i LESS num_versions)
+        list(GET versions ${i} curr)
+        if(v VERSION_GREATER curr)
+          list(INSERT versions ${i} ${v})
+          set(early_terminate TRUE)
+          break()
+        endif()
+        math(EXPR i "${i} + 1")
+      endwhile()
+      # If it did not get inserted, place it at the end.
+      if(NOT early_terminate)
+        list(APPEND versions ${v})
+      endif()
+    endif()
+  endforeach()
+
+  # With a descending list of versions, populate possible paths to search.
+  set(search_paths)
+  foreach (v ${versions})
+    list(APPEND search_paths "${platform_base}${v}")
+  endforeach()
+
+  # Force the global default /usr/local/cuda to the front on Unix.
+  if(UNIX)
+    list(INSERT search_paths 0 "/usr/local/cuda")
+  endif()
+
+  # Now search for nvcc again using the platform default search paths.
+  find_program(CUDAToolkit_NVCC_EXECUTABLE
+    NAMES nvcc nvcc.exe
+    PATHS ${search_paths}
+    PATH_SUFFIXES bin
+  )
+
+  # We are done with these variables now, cleanup for caller.
+  unset(platform_base)
+  unset(possible_paths)
+  unset(possible_versions)
+  unset(versions)
+  unset(i)
+  unset(early_terminate)
+  unset(search_paths)
+
+  if(NOT CUDAToolkit_NVCC_EXECUTABLE)
+    if(CUDAToolkit_FIND_REQUIRED)
+      message(FATAL_ERROR "Could not find nvcc, please set CUDAToolkit_ROOT.")
+    elseif(NOT CUDAToolkit_FIND_QUIETLY)
+      message(STATUS "Could not find nvcc, please set CUDAToolkit_ROOT.")
+    endif()
+
+    set(CUDAToolkit_FOUND FALSE)
+    return()
+  endif()
+endif()
+
+if(NOT CUDAToolkit_BIN_DIR AND CUDAToolkit_NVCC_EXECUTABLE)
+  get_filename_component(cuda_dir "${CUDAToolkit_NVCC_EXECUTABLE}" DIRECTORY)
+  set(CUDAToolkit_BIN_DIR "${cuda_dir}" CACHE PATH "" FORCE)
+  mark_as_advanced(CUDAToolkit_BIN_DIR)
+  unset(cuda_dir)
+endif()
+
+get_filename_component(CUDAToolkit_ROOT_DIR ${CUDAToolkit_BIN_DIR} DIRECTORY ABSOLUTE)
+
+# Handle cross compilation
+if(CMAKE_CROSSCOMPILING)
+  if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
+    # Support for NVPACK
+    set(CUDAToolkit_TARGET_NAME "armv7-linux-androideabi")
+  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+    # Support for arm cross compilation
+    set(CUDAToolkit_TARGET_NAME "armv7-linux-gnueabihf")
+  elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+    # Support for aarch64 cross compilation
+    if(ANDROID_ARCH_NAME STREQUAL "arm64")
+      set(CUDAToolkit_TARGET_NAME "aarch64-linux-androideabi")
+    else()
+      set(CUDAToolkit_TARGET_NAME "aarch64-linux")
+    endif(ANDROID_ARCH_NAME STREQUAL "arm64")
+  elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+      set(CUDAToolkit_TARGET_NAME "x86_64-linux")
+  endif()
+
+  if(EXISTS "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
+    set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}/targets/${CUDAToolkit_TARGET_NAME}")
+    # add known CUDA target root path to the set of directories we search for programs, libraries and headers
+    list(PREPEND CMAKE_FIND_ROOT_PATH "${CUDAToolkit_TARGET_DIR}")
+
+    # Mark that we need to pop the root search path changes after we have
+    # found all cuda libraries so that searches for our cross-compilation
+    # libraries work when another cuda sdk is in CMAKE_PREFIX_PATH or
+    # PATh
+    set(_CUDAToolkit_Pop_ROOT_PATH True)
+  endif()
+else()
+  # Not cross compiling
+  set(CUDAToolkit_TARGET_DIR "${CUDAToolkit_ROOT_DIR}")
+  # Now that we have the real ROOT_DIR, find components inside it.
+  list(APPEND CMAKE_PREFIX_PATH ${CUDAToolkit_ROOT_DIR})
+
+  # Mark that we need to pop the prefix path changes after we have
+  # found the cudart library.
+  set(_CUDAToolkit_Pop_Prefix True)
+endif()
+
+# Find the include/ directory
+find_path(CUDAToolkit_INCLUDE_DIR
+  NAMES cuda_runtime.h
+)
+
+# Find a tentative CUDAToolkit_LIBRARY_DIR. FindCUDAToolkit overrides it by searching for the CUDA runtime,
+# but we can't do that here, as CMakeDetermineCUDACompiler wants to use it before the variables necessary
+# for find_library() have been initialized.
+if(EXISTS "${CUDAToolkit_TARGET_DIR}/lib64")
+  set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib64")
+elseif(EXISTS "${CUDAToolkit_TARGET_DIR}/lib")
+  set(CUDAToolkit_LIBRARY_DIR "${CUDAToolkit_TARGET_DIR}/lib")
+endif()
diff --git a/Modules/Platform/Windows-NVIDIA-CUDA.cmake b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
index f425cf851f473403128fabe0c0afa267bfd3fab9..f8875941d8b5ec2057fd51df61aa8e0a8c57d650 100644
--- a/Modules/Platform/Windows-NVIDIA-CUDA.cmake
+++ b/Modules/Platform/Windows-NVIDIA-CUDA.cmake
@@ -74,6 +74,10 @@ set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC  "cudadevrt;cudart_static")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_SHARED  "cudadevrt;cudart")
 set(CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_NONE    "")
 
+if(UNIX)
+  list(APPEND CMAKE_CUDA_RUNTIME_LIBRARY_LINK_OPTIONS_STATIC "rt" "pthread" "dl")
+endif()
+
 string(APPEND CMAKE_CUDA_FLAGS_INIT " ${PLATFORM_DEFINES_CUDA} -D_WINDOWS -Xcompiler=\"${_W3}${_FLAGS_CXX}\"")
 string(APPEND CMAKE_CUDA_FLAGS_DEBUG_INIT " -Xcompiler=\"${_MDd}-Zi -Ob0 -Od ${_RTC1}\"")
 string(APPEND CMAKE_CUDA_FLAGS_RELEASE_INIT " -Xcompiler=\"${_MD}-O2 -Ob2\" -DNDEBUG")
diff --git a/Source/cmGeneratorTarget.cxx b/Source/cmGeneratorTarget.cxx
index fb0512ed62fff852f13db6f25c6d16f5d24b46e2..335f7a4f33e55a0335d858620d33fd9ff61822ef 100644
--- a/Source/cmGeneratorTarget.cxx
+++ b/Source/cmGeneratorTarget.cxx
@@ -3180,6 +3180,20 @@ void cmGeneratorTarget::AddCUDAArchitectureFlags(std::string& flags) const
 
       flags += "]";
     }
+  } else if (compiler == "Clang") {
+    for (CudaArchitecture& architecture : architectures) {
+      flags += " --cuda-gpu-arch=sm_" + architecture.name;
+
+      if (!architecture.real) {
+        Makefile->IssueMessage(
+          MessageType::WARNING,
+          "Clang doesn't support disabling CUDA real code generation.");
+      }
+
+      if (!architecture.virtual_) {
+        flags += " --no-cuda-include-ptx=sm_" + architecture.name;
+      }
+    }
   }
 }
 
diff --git a/Tests/Cuda/CMakeLists.txt b/Tests/Cuda/CMakeLists.txt
index 58b9b03c999eb606f3bf4e48aa48511d1d64a6ca..83f2f2d87a45e44953a5b038402d19bceb07332c 100644
--- a/Tests/Cuda/CMakeLists.txt
+++ b/Tests/Cuda/CMakeLists.txt
@@ -1,5 +1,4 @@
 
-ADD_TEST_MACRO(Cuda.Complex CudaComplex)
 ADD_TEST_MACRO(Cuda.ConsumeCompileFeatures CudaConsumeCompileFeatures)
 ADD_TEST_MACRO(Cuda.CXXStandardSetTwice CXXStandardSetTwice)
 ADD_TEST_MACRO(Cuda.ObjectLibrary CudaObjectLibrary)
@@ -12,10 +11,16 @@ ADD_TEST_MACRO(Cuda.NotEnabled CudaNotEnabled)
 ADD_TEST_MACRO(Cuda.SeparableCompCXXOnly SeparableCompCXXOnly)
 ADD_TEST_MACRO(Cuda.Toolkit Toolkit)
 ADD_TEST_MACRO(Cuda.IncludePathNoToolkit IncludePathNoToolkit)
-ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
-ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
 ADD_TEST_MACRO(Cuda.SharedRuntimePlusToolkit SharedRuntimePlusToolkit)
 
+# Separable compilation is currently only supported on NVCC. Disable tests
+# using it for other compilers.
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+  ADD_TEST_MACRO(Cuda.Complex CudaComplex)
+  ADD_TEST_MACRO(Cuda.ProperDeviceLibraries ProperDeviceLibraries)
+  ADD_TEST_MACRO(Cuda.ProperLinkFlags ProperLinkFlags)
+endif()
+
 # The CUDA only ships the shared version of the toolkit libraries
 # on windows
 if(NOT WIN32)
diff --git a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt
index b6e0e394b45c6844002dcfe0b08c9b34d00ab9dc..d38da6d14a3b47f39f79a9975ebbe65ad25d8550 100644
--- a/Tests/Cuda/ProperLinkFlags/CMakeLists.txt
+++ b/Tests/Cuda/ProperLinkFlags/CMakeLists.txt
@@ -9,11 +9,17 @@ project (ProperLinkFlags CUDA CXX)
 
 #Specify a set of valid CUDA flags and an invalid set of CXX flags ( for CUDA )
 #to make sure we don't use the CXX flags when linking CUDA executables
-string(APPEND CMAKE_CUDA_FLAGS " -arch=sm_35 --use_fast_math")
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+  string(APPEND CMAKE_CUDA_FLAGS "--use_fast_math")
+elseif(CMAKE_CUDA_COMPILER_ID STREQUAL "Clang")
+  string(APPEND CMAKE_CUDA_FLAGS "-ffast-math")
+endif()
+
 set(CMAKE_CXX_FLAGS "-Wall")
 
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CUDA_ARCHITECTURES 35)
 add_executable(ProperLinkFlags file1.cu main.cxx)
 
 set_target_properties( ProperLinkFlags
diff --git a/Tests/CudaOnly/CMakeLists.txt b/Tests/CudaOnly/CMakeLists.txt
index d74e81035824334d848575549f33226c5e72ca5f..ee7374e4ef2597b5ca1e223a0e7ec3017c391196 100644
--- a/Tests/CudaOnly/CMakeLists.txt
+++ b/Tests/CudaOnly/CMakeLists.txt
@@ -1,17 +1,35 @@
 
 ADD_TEST_MACRO(CudaOnly.Architecture Architecture)
-ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
 ADD_TEST_MACRO(CudaOnly.CompileFlags CudaOnlyCompileFlags)
 ADD_TEST_MACRO(CudaOnly.EnableStandard CudaOnlyEnableStandard)
 ADD_TEST_MACRO(CudaOnly.ExportPTX CudaOnlyExportPTX)
-ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
-ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
-ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
 ADD_TEST_MACRO(CudaOnly.SharedRuntimePlusToolkit CudaOnlySharedRuntimePlusToolkit)
 ADD_TEST_MACRO(CudaOnly.Standard98 CudaOnlyStandard98)
 ADD_TEST_MACRO(CudaOnly.Toolkit CudaOnlyToolkit)
 ADD_TEST_MACRO(CudaOnly.WithDefs CudaOnlyWithDefs)
 
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+  # Separable compilation is currently only supported on NVCC. Disable tests
+  # using it for other compilers.
+  ADD_TEST_MACRO(CudaOnly.CircularLinkLine CudaOnlyCircularLinkLine)
+  ADD_TEST_MACRO(CudaOnly.ResolveDeviceSymbols CudaOnlyResolveDeviceSymbols)
+  ADD_TEST_MACRO(CudaOnly.SeparateCompilation CudaOnlySeparateCompilation)
+
+  add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
+    ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
+    --build-and-test
+    "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
+    "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
+    ${build_generator_args}
+    --build-project DontResolveDeviceSymbols
+    --build-options ${build_options}
+    --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
+  )
+
+  # Only NVCC defines __CUDACC_DEBUG__ when compiling in debug mode.
+  ADD_TEST_MACRO(CudaOnly.GPUDebugFlag CudaOnlyGPUDebugFlag)
+endif()
+
 # The CUDA only ships the shared version of the toolkit libraries
 # on windows
 if(NOT WIN32)
@@ -22,17 +40,6 @@ if(MSVC)
   ADD_TEST_MACRO(CudaOnly.PDB CudaOnlyPDB)
 endif()
 
-add_test(NAME CudaOnly.DontResolveDeviceSymbols COMMAND
-  ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
-  --build-and-test
-  "${CMAKE_CURRENT_SOURCE_DIR}/DontResolveDeviceSymbols/"
-  "${CMAKE_CURRENT_BINARY_DIR}/DontResolveDeviceSymbols/"
-  ${build_generator_args}
-  --build-project DontResolveDeviceSymbols
-  --build-options ${build_options}
-  --test-command ${CMAKE_CTEST_COMMAND} -V -C $<CONFIGURATION>
-  )
-
 add_test(NAME CudaOnly.RuntimeControls COMMAND
   ${CMAKE_CTEST_COMMAND} -C $<CONFIGURATION>
   --build-and-test
diff --git a/Tests/CudaOnly/CompileFlags/CMakeLists.txt b/Tests/CudaOnly/CompileFlags/CMakeLists.txt
index cbce7d6a68b532988ba4f258feb8e4b70f488900..5e8a8e40f570d75783d025f34caf52e8b8e83512 100644
--- a/Tests/CudaOnly/CompileFlags/CMakeLists.txt
+++ b/Tests/CudaOnly/CompileFlags/CMakeLists.txt
@@ -1,16 +1,15 @@
 cmake_minimum_required(VERSION 3.17)
-cmake_policy(SET CMP0104 OLD)
 project(CompileFlags CUDA)
 
-# Clear defaults.
-set(CMAKE_CUDA_ARCHITECTURES)
-
 add_executable(CudaOnlyCompileFlags main.cu)
 
 # Try passing CUDA architecture flags explicitly.
 if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   target_compile_options(CudaOnlyCompileFlags PRIVATE
     -gencode arch=compute_50,code=compute_50
-    --compiler-options=-DHOST_DEFINE
   )
+else()
+  set_property(TARGET CudaOnlyCompileFlags PROPERTY CUDA_ARCHITECTURES 50-real)
 endif()
+
+target_compile_options(CudaOnlyCompileFlags PRIVATE -DALWAYS_DEFINE)
diff --git a/Tests/CudaOnly/CompileFlags/main.cu b/Tests/CudaOnly/CompileFlags/main.cu
index 573d230ccd2d6cb5eaf22ac6398e02d725bb0537..999c0562a30f498438030e9e616ec78e09ac14c2 100644
--- a/Tests/CudaOnly/CompileFlags/main.cu
+++ b/Tests/CudaOnly/CompileFlags/main.cu
@@ -4,11 +4,8 @@
 #  endif
 #endif
 
-// Check HOST_DEFINE only for nvcc
-#ifndef __CUDA__
-#  ifndef HOST_DEFINE
-#    error "HOST_DEFINE not defined!"
-#  endif
+#ifndef ALWAYS_DEFINE
+#  error "ALWAYS_DEFINE not defined!"
 #endif
 
 int main()
diff --git a/Tests/CudaOnly/ExportPTX/CMakeLists.txt b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
index ff6e77c33f2ef990b31f4ac0076a4af83dc4c41b..ee5f54d024af12b08c93bc87d32d5574216b4e11 100644
--- a/Tests/CudaOnly/ExportPTX/CMakeLists.txt
+++ b/Tests/CudaOnly/ExportPTX/CMakeLists.txt
@@ -34,16 +34,15 @@ static std::string ptx_paths = "$<TARGET_OBJECTS:CudaPTX>";
 # need to also pass the --name option
 set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h)
 
-get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY)
+find_package(CUDAToolkit REQUIRED)
 find_program(bin_to_c
   NAMES bin2c
-  PATHS ${cuda_compiler_bin}
+  PATHS ${CUDAToolkit_BIN_DIR}
   )
 if(NOT bin_to_c)
   message(FATAL_ERROR
     "bin2c not found:\n"
-    "  CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n"
-    "  cuda_compiler_bin='${cuda_compiler_bin}'\n"
+    "  CUDAToolkit_BIN_DIR='${CUDAToolkit_BIN_DIR}'\n"
     )
 endif()
 
diff --git a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt
index fbef15f9e16911c17176d9daf853b0007504d790..325723a6adec94139ebcfbf958c3d8cfc199f82d 100644
--- a/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt
+++ b/Tests/CudaOnly/GPUDebugFlag/CMakeLists.txt
@@ -2,18 +2,19 @@
 cmake_minimum_required(VERSION 3.7)
 project (GPUDebugFlag CUDA)
 
-#Goal for this example:
-#verify that -G enables gpu debug flags
-string(APPEND CMAKE_CUDA_FLAGS " -gencode=arch=compute_30,code=compute_30")
-string(APPEND CMAKE_CUDA_FLAGS " -G")
 set(CMAKE_CUDA_STANDARD 11)
+set(CMAKE_CUDA_ARCHITECTURES 30)
+
+# Goal for this example:
+# Verify that enabling device debug works.
+string(APPEND CMAKE_CUDA_FLAGS "-G")
 
 add_executable(CudaOnlyGPUDebugFlag main.cu)
 
+#CUDA's __CUDACC_DEBUG__ define was added in NVCC 9.0
+#so if we are below 9.0.0 we will manually add the define so that the test
+#passes
 if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 9.0.0)
-  #CUDA's __CUDACC_DEBUG__ define was added in 9.0
-  #so if we are below 9.0.0 we will manually add the define so that the test
-  #passes
   target_compile_definitions(CudaOnlyGPUDebugFlag PRIVATE "__CUDACC_DEBUG__")
 endif()
 
diff --git a/Tests/CudaOnly/WithDefs/CMakeLists.txt b/Tests/CudaOnly/WithDefs/CMakeLists.txt
index add8131a39df48885f05f109051e09b96d095adf..0ed81d89de3b3f1274d7302497e86fa656bf1cd8 100644
--- a/Tests/CudaOnly/WithDefs/CMakeLists.txt
+++ b/Tests/CudaOnly/WithDefs/CMakeLists.txt
@@ -18,7 +18,7 @@ target_compile_options(CudaOnlyWithDefs
   PRIVATE
     -DFLAG_COMPILE_LANG_$<COMPILE_LANGUAGE>
     -DFLAG_LANG_IS_CUDA=$<COMPILE_LANGUAGE:CUDA>
-    --compiler-options=-DHOST_DEFINE
+    $<$<CUDA_COMPILER_ID:NVIDIA>:--compiler-options=-DHOST_DEFINE> # Host-only defines are possible only on NVCC.
   )
 
 target_compile_definitions(CudaOnlyWithDefs
diff --git a/Tests/CudaOnly/WithDefs/main.notcu b/Tests/CudaOnly/WithDefs/main.notcu
index a5f4ed65262ee2a5503d7fc864f23c263bec2758..9119eba1124c38e3a4f1d7ff3649c5717d473bc0 100644
--- a/Tests/CudaOnly/WithDefs/main.notcu
+++ b/Tests/CudaOnly/WithDefs/main.notcu
@@ -7,8 +7,10 @@
 #  error "INC_CUDA not defined!"
 #endif
 
-#ifndef HOST_DEFINE
-#  error "HOST_DEFINE not defined!"
+#ifdef __NVCC__
+#  ifndef HOST_DEFINE
+#    error "HOST_DEFINE not defined!"
+#  endif
 #endif
 
 #ifndef PACKED_DEFINE
diff --git a/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake b/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake
index 841d0a8f5afaf9daa3f3808599fd0f1ce48dfa3f..2b4a8f5fbedec59427baec19b0aaf6a2ba5a029d 100644
--- a/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake
+++ b/Tests/RunCMake/CMP0104/CMP0104-WARN.cmake
@@ -1 +1,2 @@
 include(CMP0104-Common.cmake)
+set_property(TARGET cuda PROPERTY CUDA_ARCHITECTURES)
diff --git a/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake b/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake
index 6472f46b13e4087b77f9411de5499beaf44bb7ca..449db4a5a40d43f5369b3b0a4e74ba3260d5e49d 100644
--- a/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake
+++ b/Tests/RunCMake/NinjaMultiConfig/RunCMakeTest.cmake
@@ -285,7 +285,8 @@ run_cmake_command(NoUnusedVariables ${CMAKE_COMMAND} ${CMAKE_CURRENT_LIST_DIR}
   "-DCMAKE_DEFAULT_CONFIGS=all"
   )
 
-if(CMake_TEST_CUDA)
+# CudaSimple uses separable compilation, which is currently only supported on NVCC.
+if(CMake_TEST_CUDA AND CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
   set(RunCMake_TEST_BINARY_DIR ${RunCMake_BINARY_DIR}/CudaSimple-build)
   run_cmake_configure(CudaSimple)
   include(${RunCMake_TEST_BINARY_DIR}/target_files.cmake)
diff --git a/Tests/RunCMake/target_link_options/RunCMakeTest.cmake b/Tests/RunCMake/target_link_options/RunCMakeTest.cmake
index b919f48e08111745ed5cd0f584cac1bbc455cd7f..2035eb4de2a570316b41ecfca273f8a1cc796807 100644
--- a/Tests/RunCMake/target_link_options/RunCMakeTest.cmake
+++ b/Tests/RunCMake/target_link_options/RunCMakeTest.cmake
@@ -50,10 +50,14 @@ if (NOT CMAKE_C_COMPILER_ID STREQUAL "Intel")
   run_cmake_target(genex_DEVICE_LINK interface LinkOptions_shared_interface --config Release)
   run_cmake_target(genex_DEVICE_LINK private LinkOptions_private --config Release)
   if (CMake_TEST_CUDA)
-    run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release)
-    run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release)
-    run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release)
-    run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release)
+    # Separable compilation is only supported on NVCC.
+    if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
+      run_cmake_target(genex_DEVICE_LINK CMP0105_UNSET LinkOptions_CMP0105_UNSET --config Release)
+      run_cmake_target(genex_DEVICE_LINK CMP0105_OLD LinkOptions_CMP0105_OLD --config Release)
+      run_cmake_target(genex_DEVICE_LINK CMP0105_NEW LinkOptions_CMP0105_NEW --config Release)
+      run_cmake_target(genex_DEVICE_LINK device LinkOptions_device --config Release)
+    endif()
+
     run_cmake_target(genex_DEVICE_LINK no_device LinkOptions_no_device --config Release)
   endif()