Commit e0b6e698 authored by Robert Maynard's avatar Robert Maynard

copying cpu memory to pascal managed memory now works consistently.

When copying small arrays from cpu memory to pascal memory we would
see subsequent kernels fail as the memory transfer hadn't finished.
This is a bug as each stream should act like a FIFO queue. So
for now when encountering this use case we explicitly synchronize
after the memcpy.
parent a4b16c4b
......@@ -150,6 +150,15 @@ void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyFromControl(
static_cast<std::size_t>(numBytes),
cudaMemcpyHostToDevice,
cudaStreamPerThread));
if (CudaAllocator::IsManagedPointer(executionPtr))
{
//If we are moving memory from unmanaged host memory
//to managed host memory we have the possibility that
//the memcpy will not finish before the first usage is finished
//to work around this bug we explicitly synchronize for this
//one use case
cudaStreamSynchronize(cudaStreamPerThread);
}
}
void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyToControl(const void* executionPtr,
......
......@@ -205,6 +205,7 @@ public:
vtkm::cont::cuda::internal::IteratorBegin(portal),
vtkm::cont::cuda::internal::IteratorEnd(portal),
thrust::cuda::pointer<ValueType>(beginPointer));
cudaStreamSynchronize(cudaStreamPerThread);
//unmap the resource
this->Resource->UnMap();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment