Commit ef6a3b82 authored by Allison Vacanti's avatar Allison Vacanti Committed by Kitware Robot

Merge topic 'cuda_hint_fixes'

5a99dd76 Only use cuda hints for CUDA 8.0+.
Acked-by: Kitware Robot's avatarKitware Robot <kwrobot@kitware.com>
Acked-by: Robert Maynard's avatarRobert Maynard <robert.maynard@kitware.com>
Merge-request: !978
parents 3e4778b0 5a99dd76
......@@ -120,12 +120,14 @@ void CudaAllocator::PrepareForControl(const void* ptr, std::size_t numBytes)
if (ManagedMemorySupported)
{
#if CUDART_VERSION >= 8000
// TODO these hints need to be benchmarked and adjusted once we start
// sharing the pointers between cont/exec
VTKM_CUDA_CALL(
cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, cudaCpuDeviceId));
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, cudaCpuDeviceId, cudaStreamPerThread));
#endif // CUDA >= 8.0
}
}
......@@ -135,11 +137,13 @@ void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes)
if (ManagedMemorySupported)
{
#if CUDART_VERSION >= 8000
int dev;
VTKM_CUDA_CALL(cudaGetDevice(&dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev));
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
#endif // CUDA >= 8.0
}
}
......@@ -149,11 +153,13 @@ void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes)
if (ManagedMemorySupported)
{
#if CUDART_VERSION >= 8000
int dev;
VTKM_CUDA_CALL(cudaGetDevice(&dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
#endif // CUDA >= 8.0
}
}
......@@ -163,11 +169,13 @@ void CudaAllocator::PrepareForInPlace(const void* ptr, std::size_t numBytes)
if (ManagedMemorySupported)
{
#if CUDART_VERSION >= 8000
int dev;
VTKM_CUDA_CALL(cudaGetDevice(&dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
#endif // CUDA >= 8.0
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment