DeviceAdapterAlgorithmTBB.h 10.3 KB
Newer Older
Sujin Philip's avatar
Sujin Philip committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//
//  Copyright 2014 Sandia Corporation.
//  Copyright 2014 UT-Battelle, LLC.
//  Copyright 2014 Los Alamos National Security.
//
//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
//  the U.S. Government retains certain rights in this software.
//
//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
//  Laboratory (LANL), the U.S. Government retains certain rights in
//  this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h
#define vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h

#include <vtkm/cont/ArrayHandle.h>
24
#include <vtkm/cont/ArrayHandleIndex.h>
25
#include <vtkm/cont/ArrayHandleZip.h>
Sujin Philip's avatar
Sujin Philip committed
26 27 28
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
29 30 31
#include <vtkm/cont/internal/IteratorFromArrayPortal.h>
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
32
#include <vtkm/cont/tbb/internal/FunctorsTBB.h>
33 34
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
#include <vtkm/Extent.h>
Sujin Philip's avatar
Sujin Philip committed
35

36
VTKM_THIRDPARTY_PRE_INCLUDE
Sujin Philip's avatar
Sujin Philip committed
37

38
#if  defined(VTKM_MSVC)
39 40 41 42 43 44 45
// TBB includes windows.h, which clobbers min and max functions so we
// define NOMINMAX to fix that problem. We also include WIN32_LEAN_AND_MEAN
// to reduce the number of macros and objects windows.h imports as those also
// can cause conflicts
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#endif
Sujin Philip's avatar
Sujin Philip committed
46

47 48
#include <tbb/tbb_stddef.h>
#if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2)
Sujin Philip's avatar
Sujin Philip committed
49 50
//we provide an patched implementation of tbb parallel_sort
//that fixes ADL for std::swap. This patch has been submitted to Intel
51
//and is fixed in TBB 4.2 update 2.
Sujin Philip's avatar
Sujin Philip committed
52
#include <vtkm/cont/tbb/internal/parallel_sort.h>
53 54 55
#else
#include <tbb/parallel_sort.h>
#endif
Sujin Philip's avatar
Sujin Philip committed
56 57 58 59 60 61 62 63

#include <tbb/blocked_range.h>
#include <tbb/blocked_range3d.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_scan.h>
#include <tbb/partitioner.h>
#include <tbb/tick_count.h>

64 65
#if defined(VTKM_MSVC)
#include <Windows.h>
66 67 68
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
#endif
69

70
VTKM_THIRDPARTY_POST_INCLUDE
71

Sujin Philip's avatar
Sujin Philip committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
namespace vtkm {
namespace cont {

template<>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB> :
    vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
        DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB>,
        vtkm::cont::DeviceAdapterTagTBB>
{
public:
  template<typename T, class CIn, class COut>
  VTKM_CONT_EXPORT static T ScanInclusive(
      const vtkm::cont::ArrayHandle<T,CIn> &input,
      vtkm::cont::ArrayHandle<T,COut> &output)
  {
87
    return tbb::ScanInclusivePortals(
Sujin Philip's avatar
Sujin Philip committed
88 89 90 91 92
          input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
          output.PrepareForOutput(input.GetNumberOfValues(),
              vtkm::cont::DeviceAdapterTagTBB()), vtkm::internal::Add());
  }

93
  template<typename T, class CIn, class COut, class BinaryFunctor>
Sujin Philip's avatar
Sujin Philip committed
94 95 96
  VTKM_CONT_EXPORT static T ScanInclusive(
      const vtkm::cont::ArrayHandle<T,CIn> &input,
      vtkm::cont::ArrayHandle<T,COut> &output,
97
      BinaryFunctor binary_functor)
Sujin Philip's avatar
Sujin Philip committed
98
  {
99
    return tbb::ScanInclusivePortals(
Sujin Philip's avatar
Sujin Philip committed
100 101
          input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
          output.PrepareForOutput(input.GetNumberOfValues(),
102
            vtkm::cont::DeviceAdapterTagTBB()), binary_functor);
Sujin Philip's avatar
Sujin Philip committed
103 104 105 106 107 108 109
  }

  template<typename T, class CIn, class COut>
  VTKM_CONT_EXPORT static T ScanExclusive(
      const vtkm::cont::ArrayHandle<T,CIn> &input,
      vtkm::cont::ArrayHandle<T,COut> &output)
  {
110
    return tbb::ScanExclusivePortals(
Sujin Philip's avatar
Sujin Philip committed
111 112
          input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
          output.PrepareForOutput(input.GetNumberOfValues(),
113 114
            vtkm::cont::DeviceAdapterTagTBB()),
          vtkm::internal::Add(), vtkm::TypeTraits<T>::ZeroInitialization());
Sujin Philip's avatar
Sujin Philip committed
115 116
  }

117
  template<typename T, class CIn, class COut, class BinaryFunctor>
Sujin Philip's avatar
Sujin Philip committed
118 119 120
  VTKM_CONT_EXPORT static T ScanExclusive(
      const vtkm::cont::ArrayHandle<T,CIn> &input,
      vtkm::cont::ArrayHandle<T,COut> &output,
121 122
      BinaryFunctor binary_functor,
      const T& initialValue)
Sujin Philip's avatar
Sujin Philip committed
123
  {
124
    return tbb::ScanExclusivePortals(
Sujin Philip's avatar
Sujin Philip committed
125 126
          input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
          output.PrepareForOutput(input.GetNumberOfValues(),
127
            vtkm::cont::DeviceAdapterTagTBB()), binary_functor, initialValue);
Sujin Philip's avatar
Sujin Philip committed
128 129 130 131 132 133 134 135 136 137 138 139
  }

  template<class FunctorType>
  VTKM_CONT_EXPORT
  static void Schedule(FunctorType functor, vtkm::Id numInstances)
  {
    const vtkm::Id MESSAGE_SIZE = 1024;
    char errorString[MESSAGE_SIZE];
    errorString[0] = '\0';
    vtkm::exec::internal::ErrorMessageBuffer
        errorMessage(errorString, MESSAGE_SIZE);

140
    tbb::ScheduleKernel<FunctorType> kernel(functor);
Sujin Philip's avatar
Sujin Philip committed
141 142
    kernel.SetErrorMessageBuffer(errorMessage);

143
    ::tbb::blocked_range<vtkm::Id> range(0, numInstances, tbb::TBB_GRAIN_SIZE);
Sujin Philip's avatar
Sujin Philip committed
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

    ::tbb::parallel_for(range, kernel);

    if (errorMessage.IsErrorRaised())
      {
      throw vtkm::cont::ErrorExecution(errorString);
      }
  }

  template<class FunctorType>
  VTKM_CONT_EXPORT
  static void Schedule(FunctorType functor,
                       vtkm::Id3 rangeMax)
  {
    //we need to extract from the functor that uniform grid information
    const vtkm::Id MESSAGE_SIZE = 1024;
    char errorString[MESSAGE_SIZE];
    errorString[0] = '\0';
    vtkm::exec::internal::ErrorMessageBuffer
        errorMessage(errorString, MESSAGE_SIZE);

    //memory is generally setup in a way that iterating the first range
    //in the tightest loop has the best cache coherence.
    ::tbb::blocked_range3d<vtkm::Id> range(0, rangeMax[2],
                                           0, rangeMax[1],
                                           0, rangeMax[0]);

171
    tbb::ScheduleKernelId3<FunctorType> kernel(functor,rangeMax);
Sujin Philip's avatar
Sujin Philip committed
172 173 174 175 176 177 178 179 180 181 182 183 184 185
    kernel.SetErrorMessageBuffer(errorMessage);

    ::tbb::parallel_for(range, kernel);

    if (errorMessage.IsErrorRaised())
      {
      throw vtkm::cont::ErrorExecution(errorString);
      }
  }

  template<typename T, class Container>
  VTKM_CONT_EXPORT static void Sort(
      vtkm::cont::ArrayHandle<T,Container> &values)
  {
186
    //this is required to get sort to work with zip handles
187 188
    std::less< T > lessOp;
    Sort(values, lessOp );
189 190
  }

191
  template<typename T, class Container, class BinaryCompare>
Sujin Philip's avatar
Sujin Philip committed
192
  VTKM_CONT_EXPORT static void Sort(
193
      vtkm::cont::ArrayHandle<T,Container> &values, BinaryCompare binary_compare)
Sujin Philip's avatar
Sujin Philip committed
194 195 196 197 198 199 200 201 202
  {
    typedef typename vtkm::cont::ArrayHandle<T,Container>::template
      ExecutionTypes<vtkm::cont::DeviceAdapterTagTBB>::Portal PortalType;
    PortalType arrayPortal = values.PrepareForInPlace(
      vtkm::cont::DeviceAdapterTagTBB());

    typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
    IteratorsType iterators(arrayPortal);

203
    internal::WrappedBinaryOperator<bool,BinaryCompare> wrappedCompare(binary_compare);
204 205 206
    ::tbb::parallel_sort(iterators.GetBegin(),
                         iterators.GetEnd(),
                         wrappedCompare);
Sujin Philip's avatar
Sujin Philip committed
207 208
  }

209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
  template<typename T, typename U, class StorageT,  class StorageU>
  VTKM_CONT_EXPORT static void SortByKey(
      vtkm::cont::ArrayHandle<T,StorageT> &keys,
      vtkm::cont::ArrayHandle<U,StorageU> &values)
  {
    SortByKey(keys, values, std::less<T>());
  }

  template<typename T, typename U,
           class StorageT, class StorageU,
           class Compare>
  VTKM_CONT_EXPORT static void SortByKey(
      vtkm::cont::ArrayHandle<T,StorageT>& keys,
      vtkm::cont::ArrayHandle<U,StorageU>& values,
      Compare comp)
  {
    typedef vtkm::cont::ArrayHandle<T,StorageT> KeyType;
    if (sizeof(U) > sizeof(vtkm::Id))
    {
      /// More efficient sort:
      /// Move value indexes when sorting and reorder the value array at last

      typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
232
      typedef vtkm::cont::ArrayHandle<vtkm::Id> IndexType;
233 234 235 236 237 238
      typedef vtkm::cont::ArrayHandleZip<KeyType,IndexType> ZipHandleType;

      IndexType indexArray;
      ValueType valuesScattered;
      const vtkm::Id size = values.GetNumberOfValues();

239
      Copy( ArrayHandleIndex(keys.GetNumberOfValues()), indexArray);
240 241

      ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys,indexArray);
242
      Sort(zipHandle,vtkm::cont::internal::KeyCompare<T,vtkm::Id,Compare>(comp));
243 244


245
      tbb::ScatterPortal(values.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
246 247 248 249 250 251 252 253 254 255 256
                    indexArray.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
                    valuesScattered.PrepareForOutput(size,vtkm::cont::DeviceAdapterTagTBB()));

      Copy( valuesScattered, values );
    }
    else
    {
      typedef vtkm::cont::ArrayHandle<U,StorageU> ValueType;
      typedef vtkm::cont::ArrayHandleZip<KeyType,ValueType> ZipHandleType;

      ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys,values);
257
      Sort(zipHandle,vtkm::cont::internal::KeyCompare<T,U,Compare>(comp));
258 259
    }
  }
Sujin Philip's avatar
Sujin Philip committed
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303

  VTKM_CONT_EXPORT static void Synchronize()
  {
    // Nothing to do. This device schedules all of its operations using a
    // split/join paradigm. This means that the if the control threaad is
    // calling this method, then nothing should be running in the execution
    // environment.
  }

};

/// TBB contains its own high resolution timer.
///
template<>
class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagTBB>
{
public:
  VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
  {
    this->Reset();
  }
  VTKM_CONT_EXPORT void Reset()
  {
    vtkm::cont::DeviceAdapterAlgorithm<
        vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
    this->StartTime = ::tbb::tick_count::now();
  }
  VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
  {
    vtkm::cont::DeviceAdapterAlgorithm<
        vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
    ::tbb::tick_count currentTime = ::tbb::tick_count::now();
    ::tbb::tick_count::interval_t elapsedTime = currentTime - this->StartTime;
    return static_cast<vtkm::Float64>(elapsedTime.seconds());
  }

private:
  ::tbb::tick_count StartTime;
};

}
} // namespace vtkm::cont

#endif //vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h