Merge topic 'handle_busy_cuda_device_better'

d0a70946 Simplify the DeviceAdapterRuntimeDetectorCuda to not do a kernel launch. Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !1533

Merge topic 'handle_busy_cuda_device_better'
9a94c8c5 · Robert Maynard · Kitware Robot · 610bfc21 · d0a70946 · 9a94c8c5
Commit 9a94c8c5 authored 6 years ago by Robert Maynard Committed by Kitware Robot 6 years ago
--- a/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
+++ b/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
+# VTK-m CUDA detection properly handles busy devices
+
+When an application that uses VTK-m is first launched it will
+do a check to see if CUDA is supported at runtime. If for
+some reason that CUDA card is not allowing kernel execution
+VTK-m would report the hardware doesn't have CUDA support.
+
+This was problematic as was over aggressive in disabling CUDA
+support for hardware that could support kernel execution in
+the future. With the fact that every VTK-m worklet is executed
+through a TryExecute it is no longer necessary to be so
+aggressive in disabling CUDA support.
+
+Now the behavior is that VTK-m considers a machine to have
+CUDA runtime support if it has 1+ GPU's of Kepler or
+higher hardware (SM_30+).
+
--- a/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
+++ b/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
@@ -27,24 +27,6 @@
 #include <vtkm/Math.h>
 #include <vtkm/cont/cuda/ErrorCuda.h>

-namespace vtkm
-{
-namespace cont
-{
-namespace cuda
-{
-namespace internal
-{
-
-static __global__ void DetermineIfValidCudaDevice()
-{
-  //used only to see if we can launch kernels. It is possible to have a
-  //CUDA capable device, but still fail to have CUDA support.
-}
-}
-}
-}
-}
 namespace
 {
 static std::once_flag deviceQueryFlag;
@@ -64,28 +46,11 @@ void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32&
    for (vtkm::Int32 i = 0; i < numDevices; i++)
    {
      cudaDeviceProp prop;
-      VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
-      const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
-      archVersion = vtkm::Max(arch, archVersion);
-    }
-
-    //Make sure we can actually launch a kernel. This could fail for any
-    //of the following reasons:
-    //
-    // 1. cudaErrorInsufficientDriver, caused by out of data drives
-    // 2. cudaErrorDevicesUnavailable, caused by another process locking the
-    //    device or somebody disabling cuda support on the device
-    // 3. cudaErrorNoKernelImageForDevice we built for a compute version
-    //    greater than the device we are running on
-    // Most likely others that I don't even know about
-    if (numDevices > 0)
-    {
-      vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
-      cudaStreamSynchronize(cudaStreamPerThread);
-      if (cudaSuccess != cudaGetLastError())
+      res = cudaGetDeviceProperties(&prop, i);
+      if (res == cudaSuccess)
      {
-        numDevices = 0;
-        archVersion = 0;
+        const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
+        archVersion = vtkm::Max(arch, archVersion);
      }
    }
  });
@@ -112,7 +77,7 @@ DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRun

 bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
 {
-  return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
+  return this->NumberOfDevices > 0 && this->HighestArchSupported >= 30;
 }
 }
 } // namespace vtkm::cont