Commit 2a2159b1 authored by Robert Maynard's avatar Robert Maynard

Generalize the support for zip handles inside the cuda backend.

Instead of having a single specialization for sort and zip handles,
we know handle any fancy handles being passed to the cuda device adapter.
This was done by reworking how we represent fancy iterators inside thrust,
and instead of using a transform iterator + counting iterator we just use
a iterator_facade.
parent c41f232e
......@@ -40,10 +40,14 @@ public:
typedef PortalTypeFirst_ PortalTypeFirst;
typedef PortalTypeSecond_ PortalTypeSecond;
VTKM_EXEC_CONT_EXPORT
ArrayPortalExecZip()
: PortalFirst(), PortalSecond()
{ } //needs to be host and device so that cuda can create lvalue of these
VTKM_CONT_EXPORT
ArrayPortalExecZip(const PortalTypeFirst &portalfirst = PortalTypeFirst(),
const PortalTypeSecond &portalsecond = PortalTypeSecond())
ArrayPortalExecZip(const PortalTypeFirst &portalfirst,
const PortalTypeSecond &portalsecond)
: PortalFirst(portalfirst), PortalSecond(portalsecond)
{ }
......
......@@ -24,12 +24,13 @@
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/cuda/internal/MakeThrustIterator.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
#include <vtkm/exec/internal/WorkletInvokeFunctor.h>
#include <vtkm/exec/cuda/internal/WrappedOperators.h>
// Disable warnings we check vtkm for but Thrust does not.
#if defined(__GNUC__) || defined(____clang__)
......@@ -301,10 +302,10 @@ private:
typename InputPortal::ValueType ReducePortal(const InputPortal &input,
typename InputPortal::ValueType initialValue)
{
return ::thrust::reduce(thrust::cuda::par,
IteratorBegin(input),
IteratorEnd(input),
initialValue);
typedef typename InputPortal::ValueType ValueType;
return ReducePortal(input,
initialValue,
::thrust::plus<ValueType>());
}
template<class InputPortal, class BinaryOperation>
......@@ -313,11 +314,13 @@ private:
typename InputPortal::ValueType initialValue,
BinaryOperation binaryOP)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename InputPortal::ValueType,
BinaryOperation> bop(binaryOP);
return ::thrust::reduce(thrust::cuda::par,
IteratorBegin(input),
IteratorEnd(input),
initialValue,
binaryOP);
bop);
}
template<class KeysPortal, class ValuesPortal,
......@@ -342,6 +345,8 @@ private:
::thrust::equal_to<typename KeysPortal::ValueType> binaryPredicate;
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename ValuesPortal::ValueType,
BinaryOperation> bop(binaryOP);
result_iterators = ::thrust::reduce_by_key(thrust::cuda::par,
IteratorBegin(keys),
IteratorEnd(keys),
......@@ -349,7 +354,7 @@ private:
keys_out_begin,
values_out_begin,
binaryPredicate,
binaryOP);
bop);
return static_cast<vtkm::Id>( ::thrust::distance(keys_out_begin,
result_iterators.first) );
......@@ -406,19 +411,19 @@ private:
template<class ValuesPortal>
VTKM_CONT_EXPORT static void SortPortal(const ValuesPortal &values)
{
::thrust::sort(thrust::cuda::par,
IteratorBegin(values),
IteratorEnd(values));
typedef typename ValuesPortal::ValueType ValueType;
SortPortal(values, ::thrust::less<ValueType>());
}
template<class ValuesPortal, class Compare>
VTKM_CONT_EXPORT static void SortPortal(const ValuesPortal &values,
Compare comp)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,Compare> bop(comp);
::thrust::sort(thrust::cuda::par,
IteratorBegin(values),
IteratorEnd(values),
comp);
bop);
}
......@@ -426,10 +431,8 @@ private:
VTKM_CONT_EXPORT static void SortByKeyPortal(const KeysPortal &keys,
const ValuesPortal &values)
{
::thrust::sort_by_key(thrust::cuda::par,
IteratorBegin(keys),
IteratorEnd(keys),
IteratorBegin(values));
typedef typename KeysPortal::ValueType ValueType;
SortByKeyPortal(keys,values,::thrust::less<ValueType>());
}
template<class KeysPortal, class ValuesPortal, class Compare>
......@@ -437,6 +440,7 @@ private:
const ValuesPortal &values,
Compare comp)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,Compare> bop(comp);
::thrust::sort_by_key(thrust::cuda::par,
IteratorBegin(keys),
IteratorEnd(keys),
......
......@@ -25,6 +25,7 @@
#include <vtkm/internal/ExportMacros.h>
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
#include <vtkm/exec/cuda/internal/WrappedOperators.h>
// Disable warnings we check vtkm for but Thrust does not.
#if defined(__GNUC__) || defined(____clang__)
......@@ -44,40 +45,6 @@
#pragma GCC diagnostic pop
#endif // gcc || clang
//needed forward declares to get zip handle to work properly
namespace vtkm {
namespace exec {
namespace internal {
template<typename ValueType_,
typename PortalTypeFirst,
typename PortalTypeSecond>
class ArrayPortalExecZip;
}
}
}
//more forward declares needed to get zip handle to work properly
namespace vtkm {
namespace cont {
namespace cuda {
namespace internal {
namespace detail { template<class PortalType> struct IteratorTraits; }
//forward declare IteratorBegin
template<class PortalType>
VTKM_CONT_EXPORT
typename detail::IteratorTraits<PortalType>::IteratorType
IteratorBegin(PortalType portal);
}
}
}
}
namespace vtkm {
namespace cont {
namespace cuda {
......@@ -102,17 +69,6 @@ template<typename PortalType, typename T>
struct ThrustIteratorTag<PortalType, const T*> {
typedef ThrustIteratorDevicePtrTag Type;
};
template<typename T, typename U, typename V>
struct ThrustIteratorTag< vtkm::exec::internal::ArrayPortalExecZip< T, U, V >,
T > {
//this is a real special case. ExecZip and PortalValue don't combine
//well together, when used with DeviceAlgorithm that has a custom operator
//the custom operator is actually passed the PortalValue instead of
//the real values, and by that point we can't fix anything since we
//don't know what the original operator is
typedef ThrustIteratorZipTag Type;
};
template<typename T> struct ThrustStripPointer;
template<typename T> struct ThrustStripPointer<T *> {
......@@ -122,96 +78,10 @@ template<typename T> struct ThrustStripPointer<const T *> {
typedef const T Type;
};
template<class PortalType>
struct PortalValue {
typedef typename PortalType::ValueType ValueType;
VTKM_EXEC_EXPORT
PortalValue()
: Portal(),
Index(0) { }
VTKM_EXEC_EXPORT
PortalValue(const PortalType &portal, vtkm::Id index)
: Portal(portal), Index(index) { }
VTKM_EXEC_EXPORT
PortalValue(const PortalValue<PortalType> &other)
: Portal(other.Portal), Index(other.Index) { }
VTKM_EXEC_EXPORT
ValueType operator=(ValueType value) {
this->Portal.Set(this->Index, value);
return value;
}
VTKM_EXEC_EXPORT
operator ValueType(void) const {
return this->Portal.Get(this->Index);
}
const PortalType Portal;
const vtkm::Id Index;
};
template<class PortalType>
class LookupFunctor
: public ::thrust::unary_function<vtkm::Id,
PortalValue<PortalType> >
{
public:
VTKM_EXEC_EXPORT LookupFunctor()
: Portal() { }
VTKM_EXEC_EXPORT LookupFunctor(PortalType portal)
: Portal(portal) { }
VTKM_EXEC_EXPORT
PortalValue<PortalType>
operator()(vtkm::Id index)
{
return PortalValue<PortalType>(this->Portal, index);
}
private:
PortalType Portal;
};
template<class PortalType, class Tag> struct IteratorChooser;
template<class PortalType>
struct IteratorChooser<PortalType, detail::ThrustIteratorTransformTag> {
typedef ::thrust::transform_iterator<
LookupFunctor<PortalType>,
::thrust::counting_iterator<vtkm::Id> > Type;
};
template<class PortalType>
struct IteratorChooser<PortalType, detail::ThrustIteratorZipTag> {
//this is a real special case. ExecZip and PortalValue don't combine
//well together, when used with DeviceAlgorithm that has a custom operator
//the custom operator is actually passed the PortalValue instead of
//the real values, and by that point we can't fix anything since we
//don't know what the original operator is.
//So to fix this issue we wrap the original array portals into a thrust
//zip iterator and let handle everything
typedef typename PortalType::PortalTypeFirst PortalTypeFirst;
typedef typename detail::ThrustIteratorTag<
PortalTypeFirst,
typename PortalTypeFirst::IteratorType>::Type FirstTag;
typedef typename IteratorChooser<PortalTypeFirst, FirstTag>::Type FirstIterType;
typedef typename PortalType::PortalTypeSecond PortalTypeSecond;
typedef typename detail::ThrustIteratorTag<
PortalTypeSecond,
typename PortalTypeSecond::IteratorType>::Type SecondTag;
typedef typename IteratorChooser<PortalTypeSecond, SecondTag>::Type SecondIterType;
//Now that we have deduced the concrete types of the first and second
//array portals of the zip we can construct a zip iterator for those
typedef ::thrust::tuple<FirstIterType, SecondIterType> IteratorTuple;
typedef ::thrust::zip_iterator<IteratorTuple> Type;
typedef vtkm::exec::cuda::internal::IteratorFromArrayPortal<PortalType> Type;
};
template<class PortalType>
struct IteratorChooser<PortalType, detail::ThrustIteratorDevicePtrTag> {
......@@ -245,32 +115,12 @@ MakeDevicePtr(const T *iter)
return ::thrust::cuda::pointer<const T>(iter);
}
template<typename T, typename U>
VTKM_CONT_EXPORT
::thrust::zip_iterator<
::thrust::tuple<typename IteratorTraits<T>::IteratorType,
typename IteratorTraits<U>::IteratorType
>
>
MakeZipIterator(const T& t, const U& u)
{
//todo deduce from T and U the iterator types
typedef typename IteratorTraits<T>::IteratorType FirstIterType;
typedef typename IteratorTraits<U>::IteratorType SecondIterType;
return ::thrust::make_zip_iterator(
::thrust::make_tuple( vtkm::cont::cuda::internal::IteratorBegin(t),
vtkm::cont::cuda::internal::IteratorBegin(u) )
);
}
template<class PortalType>
VTKM_CONT_EXPORT
typename IteratorTraits<PortalType>::IteratorType
MakeIteratorBegin(PortalType portal, detail::ThrustIteratorTransformTag)
{
return ::thrust::make_transform_iterator(
::thrust::make_counting_iterator(vtkm::Id(0)),
LookupFunctor<PortalType>(portal));
return vtkm::exec::cuda::internal::IteratorFromArrayPortal<PortalType>(portal,0);
}
template<class PortalType>
......
......@@ -20,6 +20,8 @@
set(headers
ArrayPortalFromThrust.h
IteratorFromArrayPortal.h
WrappedOperators.h
)
vtkm_disable_troublesome_thrust_warnings()
......
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_cuda_internal_IteratorFromArrayPortal_h
#define vtk_m_exec_cuda_internal_IteratorFromArrayPortal_h
#include <vtkm/Types.h>
#include <vtkm/Pair.h>
#include <vtkm/internal/ExportMacros.h>
// Disable warnings we check vtkm for but Thrust does not.
#if defined(__GNUC__) || defined(____clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#pragma GCC diagnostic ignored "-Wunused-parameter"
#pragma GCC diagnostic ignored "-Wconversion"
#endif // gcc || clang
#include <thrust/functional.h>
#include <thrust/iterator/iterator_facade.h>
#include <thrust/system/cuda/execution_policy.h>
#if defined(__GNUC__) || defined(____clang__)
#pragma GCC diagnostic pop
#endif // gcc || clang
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
template<class ArrayPortalType>
struct PortalValue
{
typedef typename ArrayPortalType::ValueType ValueType;
VTKM_EXEC_CONT_EXPORT
PortalValue(const ArrayPortalType &portal, vtkm::Id index)
: Portal(portal), Index(index) { }
VTKM_EXEC_EXPORT
void Swap( PortalValue<ArrayPortalType> &rhs ) throw()
{
//we need use the explicit type not a proxy temp object
//A proxy temp object would point to the same underlying data structure
//and would not hold the old value of *this once *this was set to rhs.
const ValueType aValue = *this;
*this = rhs;
rhs = aValue;
}
VTKM_EXEC_EXPORT
PortalValue<ArrayPortalType> &operator=(
const PortalValue<ArrayPortalType> &rhs)
{
this->Portal.Set(this->Index, rhs.Portal.Get(rhs.Index));
return *this;
}
VTKM_EXEC_EXPORT
ValueType operator=(const ValueType& value)
{
this->Portal.Set(this->Index, value);
return value;
}
VTKM_EXEC_EXPORT
operator ValueType(void) const
{
return this->Portal.Get(this->Index);
}
const ArrayPortalType& Portal;
vtkm::Id Index;
};
template<class ArrayPortalType>
class IteratorFromArrayPortal : public
::thrust::iterator_facade<
IteratorFromArrayPortal<ArrayPortalType>,
typename ArrayPortalType::ValueType,
::thrust::system::cuda::tag,
::thrust::random_access_traversal_tag,
PortalValue<ArrayPortalType>,
vtkm::Id>
{
public:
VTKM_EXEC_CONT_EXPORT
IteratorFromArrayPortal()
: Portal(), Index(0) { }
VTKM_CONT_EXPORT
explicit IteratorFromArrayPortal(const ArrayPortalType &portal,
vtkm::Id index = 0)
: Portal(portal), Index(index) { }
VTKM_EXEC_EXPORT
PortalValue<ArrayPortalType>
operator[](std::size_t idx) const
{
return PortalValue<ArrayPortalType>(this->Portal,
this->Index + static_cast<vtkm::Id>(idx) );
}
private:
ArrayPortalType Portal;
vtkm::Id Index;
// Implementation for ::thrust iterator_facade
friend class ::thrust::iterator_core_access;
VTKM_EXEC_EXPORT
PortalValue<ArrayPortalType> dereference() const
{
return PortalValue<ArrayPortalType>(this->Portal,
this->Index);
}
VTKM_EXEC_EXPORT
bool equal(const IteratorFromArrayPortal<ArrayPortalType> &other) const
{
// Technically, we should probably check that the portals are the same,
// but the portal interface does not specify an equal operator. It is
// by its nature undefined what happens when comparing iterators from
// different portals anyway.
return (this->Index == other.Index);
}
VTKM_EXEC_CONT_EXPORT
void increment()
{
this->Index++;
}
VTKM_EXEC_CONT_EXPORT
void decrement()
{
this->Index--;
}
VTKM_EXEC_CONT_EXPORT
void advance(vtkm::Id delta)
{
this->Index += delta;
}
VTKM_EXEC_CONT_EXPORT
vtkm::Id
distance_to(const IteratorFromArrayPortal<ArrayPortalType> &other) const
{
// Technically, we should probably check that the portals are the same,
// but the portal interface does not specify an equal operator. It is
// by its nature undefined what happens when comparing iterators from
// different portals anyway.
return other.Index - this->Index;
}
};
}
}
}
} //namespace vtkm::exec::cuda::internal
//So for the unary_transform_functor and binary_transform_functor inside
//of thrust, they verify that the index they are storing into is a reference
//instead of a value, so that the contents actually are written to global memory.
//
//But for vtk-m we pass in facade objects, which are passed by value, but
//must be treated as references. So do to do that properly we need to specialize
//is_non_const_reference to state a PortalValue by value is valid for writing
namespace thrust
{
namespace detail
{
template< typename T > struct is_non_const_reference;
template< typename T >
struct is_non_const_reference< vtkm::exec::cuda::internal::PortalValue<T> >
: thrust::detail::true_type
{ };
}
}
#endif //vtk_m_exec_cuda_internal_IteratorFromArrayPortal_h
\ No newline at end of file
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_cuda_internal_WrappedOperators_h
#define vtk_m_exec_cuda_internal_WrappedOperators_h
#include <vtkm/Types.h>
#include <vtkm/Pair.h>
#include <vtkm/internal/ExportMacros.h>
#include <vtkm/exec/cuda/internal/IteratorFromArrayPortal.h>
namespace vtkm {
namespace exec {
namespace cuda {
namespace internal {
// Binary function object wrapper which can detect and handle calling the
// wrapped operator with complex value types such as
// PortalValue which happen when passed an input array that
// is implicit.
template<typename ResultType, typename Function>
struct WrappedBinaryOperator
{
Function m_f;
VTKM_CONT_EXPORT
WrappedBinaryOperator(const Function &f)
: m_f(f)
{}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const T &x, const U &y) const
{
return m_f(x, y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const T &x,
const PortalValue<U> &y) const
{
typedef typename PortalValue<U>::ValueType ValueType;
return m_f(x, (ValueType)y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
const U &y) const
{
typedef typename PortalValue<T>::ValueType ValueType;
return m_f((ValueType)x, y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
const PortalValue<U> &y) const
{
typedef typename PortalValue<T>::ValueType ValueTypeT;
typedef typename PortalValue<U>::ValueType ValueTypeU;
return m_f((ValueTypeT)x, (ValueTypeU)y);
}
};
}
}
}
} //namespace vtkm::exec::cuda::internal
#endif //vtk_m_exec_cuda_internal_WrappedOperators_h
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment