Commit b3687c6f authored by Robert Maynard's avatar Robert Maynard

Workaround inclusive_scan issues in thrust 1.8.X for complex value types.

The original workaround for inclusive_scan bugs in thrust 1.8 only solved the
issue for basic arithmetic types such as int, float, double. Now we go one
step further and fix the problem for all types.

The solution is to provide a proper implementation of destructive_accumulate_n
and make sure it exists before any includes of thrust occur.
parent eef9acfa
......@@ -23,6 +23,11 @@
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#ifdef VTKM_CUDA
//This is required to be first so that we get patches for thrust included
//in the correct order
#include <vtkm/exec/cuda/internal/ThrustPatches.h>
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h>
#endif
......
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_cuda_internal_ThrustPatches_h
#define vtk_m_exec_cuda_internal_ThrustPatches_h
//Forward declare of WrappedBinaryOperator
namespace vtkm { namespace exec { namespace cuda { namespace internal {
template<typename T, typename F>
class WrappedBinaryOperator;
} } } } //namespace vtkm::exec::cuda::internal
namespace thrust { namespace system { namespace cuda { namespace detail {
namespace bulk_ { namespace detail { namespace accumulate_detail {
//So for thrust 1.8.0 - 1.8.2 the inclusive_scan has a bug when accumulating
//values when the binary operators states it is not commutative.
//For more complex value types, we patch thrust/bulk with fix that is found
//in issue: https://github.com/thrust/thrust/issues/692
//
//This specialization needs to be included before ANY thrust includes otherwise
//other device code inside thrust that calls it will not see it
template<typename ConcurrentGroup, typename RandomAccessIterator, typename Size, typename T, typename F>
__device__ T destructive_accumulate_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init,
vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> binary_op)
{
typedef typename ConcurrentGroup::size_type size_type;
size_type tid = g.this_exec.index();
T x = init;
if(tid < n)
{
x = first[tid];
}
g.wait();
for(size_type offset = 1; offset < g.size(); offset += offset)
{
if(tid >= offset && tid - offset < n)
{
x = binary_op(first[tid - offset], x);
}
g.wait();
if(tid < n)
{
first[tid] = x;
}
g.wait();
}
T result = binary_op(init, first[n - 1]);
g.wait();
return result;
}
} } } //namespace bulk_::detail::accumulate_detail
} } } } //namespace thrust::system::cuda::detail
#endif //vtk_m_exec_cuda_internal_ThrustPatches_h
......@@ -240,19 +240,17 @@ struct WrappedBinaryPredicate
}
} //namespace vtkm::exec::cuda::internal
namespace thrust
{
namespace detail
{
//So for thrust 1.8.0 - 1.8.2 the inclusive_scan has a bug when accumulating
//values when the binary operators states it is not commutative. At the
//same time the is_commutative condition is used to perform faster paths. So
//We state that all WrappedBinaryOperator are commutative.
namespace thrust { namespace detail {
//
// We tell Thrust that our WrappedBinaryOperator is commutative so that we
// activate numerous fast paths inside thrust which are only available when
// the binary functor is commutative and the T type is is_arithmetic
//
//
template< typename T, typename F>
struct is_commutative< vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> > :
public thrust::detail::is_arithmetic<T> { };
}
}
} } //namespace thrust::detail
#endif //vtk_m_exec_cuda_internal_WrappedOperators_h
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment