Commit 250888f7 authored by Thomas Otahal's avatar Thomas Otahal

CPU parallel radix sorting

Parallel radix sorting will be invoked in DeviceAdapterAlgorthmTBB.h when
the input is ArrayHandle<T, vtkm::cont::StorageTagBasic> where T is one of
the following basic C++ types:

unsigned int
unsigned short int
unsigned long int
unsigned long long int
unsigned char
char16_t
char32_t
wchar_t
char
short
int
long long
signed char
float
double

If a comparison operator is provided, it must be type std::less<T> or std::greater<T>.

Radix sort implementation is Satish parallel radix sort as documented in the
following citation:

  Fast sort on CPUs and GPUs: a case for bandwidth oblivious SIMD sort.
    N. Satish, C. Kim, J. Chhugani, A. D. Nguyen, V. W. Lee, D. Kim, and P. Dubey.
    In Proc. SIGMOD, pages 351–362, 2010

Implementation is based on Takuya Akiba's GitHub source code with the following
changes:

   - Changed parallel threading from OpenMP to TBB tasks
   - Removed pair sorting
   - Added minimum threshold for parallel, will instead invoke serial radix sort (kxsort)
   - Added std::greater<T> and std::less<T> to interface for descending order sorts
   - Added can_use_parallel_radix_sort<T, F>() function to determine if parallel radix sorting
     is possible for type T and compare function F (fallback is std::sort() if not possible)
   - Added linear scaling of threads used by the algorithm for more stable performance
     on machines with lots of available threads (KNL and Haswell)

Added kxsort (serial MSD radix sort by Dinghua Li via GitHub) implementation without modification.
parent 8be1a71a
......@@ -74,6 +74,8 @@ CMake/FindTBB.cmake
CMake/FindGLEW.cmake
Utilities
vtkm/cont/tbb/internal/parallel_sort.h
vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h
vtkm/cont/tbb/internal/kxsort.h
vtkm/testing/OptionParser.h
vtkm/internal/brigand.hpp
version.txt
......
......@@ -34,7 +34,11 @@ if (VTKm_ENABLE_TBB)
endif()
endif()
vtkm_declare_headers(parallel_sort.h TESTABLE OFF)
vtkm_declare_headers(parallel_sort.h
parallel_radix_sort_tbb.h
kxsort.h
TESTABLE OFF)
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_TBB})
#-----------------------------------------------------------------------------
......
......@@ -265,6 +265,21 @@ public:
::tbb::parallel_sort(iterators.GetBegin(), iterators.GetEnd(), wrappedCompare);
}
template <typename T, class BinaryCompare>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>& values,
BinaryCompare binary_compare)
{
if (parallel_radix_sort_tbb::can_use_parallel_radix_sort<T, BinaryCompare>())
{
::parallel_radix_sort_tbb::parallel_radix_sort(
values.GetStorage().GetArray(), values.GetNumberOfValues(), binary_compare);
}
else
{
Sort<T, vtkm::cont::StorageTagBasic, BinaryCompare>(values, binary_compare);
}
}
template <typename T, typename U, class StorageT, class StorageU>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values)
......
......@@ -66,6 +66,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include <tbb/parallel_scan.h>
#include <tbb/partitioner.h>
#include <tbb/tick_count.h>
#include <vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h>
#if defined(VTKM_MSVC)
#pragma pop_macro("__TBB_NO_IMPLICITLINKAGE")
......
/* The MIT License
Copyright (c) 2016 Dinghua Li <voutcn@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#ifndef KXSORT_H__
#define KXSORT_H__
#include <algorithm>
#include <iterator>
namespace kx
{
static const int kRadixBits = 8;
static const size_t kInsertSortThreshold = 64;
static const int kRadixMask = (1 << kRadixBits) - 1;
static const int kRadixBin = 1 << kRadixBits;
//================= HELPING FUNCTIONS ====================
template <class T>
struct RadixTraitsUnsigned
{
static const int nBytes = sizeof(T);
int kth_byte(const T& x, int k) { return (x >> (kRadixBits * k)) & kRadixMask; }
bool compare(const T& x, const T& y) { return x < y; }
};
template <class T>
struct RadixTraitsSigned
{
static const int nBytes = sizeof(T);
static const T kMSB = T(0x80) << ((sizeof(T) - 1) * 8);
int kth_byte(const T& x, int k) { return ((x ^ kMSB) >> (kRadixBits * k)) & kRadixMask; }
bool compare(const T& x, const T& y) { return x < y; }
};
template <class RandomIt, class ValueType, class RadixTraits>
inline void insert_sort_core_(RandomIt s, RandomIt e, RadixTraits radix_traits)
{
for (RandomIt i = s + 1; i < e; ++i)
{
if (radix_traits.compare(*i, *(i - 1)))
{
RandomIt j;
ValueType tmp = *i;
*i = *(i - 1);
for (j = i - 1; j > s && radix_traits.compare(tmp, *(j - 1)); --j)
{
*j = *(j - 1);
}
*j = tmp;
}
}
}
template <class RandomIt, class ValueType, class RadixTraits, int kWhichByte>
inline void radix_sort_core_(RandomIt s, RandomIt e, RadixTraits radix_traits)
{
RandomIt last_[kRadixBin + 1];
RandomIt* last = last_ + 1;
size_t count[kRadixBin] = { 0 };
for (RandomIt i = s; i < e; ++i)
{
++count[radix_traits.kth_byte(*i, kWhichByte)];
}
last_[0] = last_[1] = s;
for (int i = 1; i < kRadixBin; ++i)
{
last[i] = last[i - 1] + count[i - 1];
}
for (int i = 0; i < kRadixBin; ++i)
{
RandomIt end = last[i - 1] + count[i];
if (end == e)
{
last[i] = e;
break;
}
while (last[i] != end)
{
ValueType swapper = *last[i];
int tag = radix_traits.kth_byte(swapper, kWhichByte);
if (tag != i)
{
do
{
std::swap(swapper, *last[tag]++);
} while ((tag = radix_traits.kth_byte(swapper, kWhichByte)) != i);
*last[i] = swapper;
}
++last[i];
}
}
if (kWhichByte > 0)
{
for (int i = 0; i < kRadixBin; ++i)
{
if (count[i] > kInsertSortThreshold)
{
radix_sort_core_<RandomIt, ValueType, RadixTraits, (kWhichByte > 0 ? (kWhichByte - 1) : 0)>(
last[i - 1], last[i], radix_traits);
}
else if (count[i] > 1)
{
insert_sort_core_<RandomIt, ValueType, RadixTraits>(last[i - 1], last[i], radix_traits);
}
}
}
}
template <class RandomIt, class ValueType, class RadixTraits>
inline void radix_sort_entry_(RandomIt s, RandomIt e, ValueType*, RadixTraits radix_traits)
{
if (e - s <= (int)kInsertSortThreshold)
insert_sort_core_<RandomIt, ValueType, RadixTraits>(s, e, radix_traits);
else
radix_sort_core_<RandomIt, ValueType, RadixTraits, RadixTraits::nBytes - 1>(s, e, radix_traits);
}
template <class RandomIt, class ValueType>
inline void radix_sort_entry_(RandomIt s, RandomIt e, ValueType*)
{
if (ValueType(-1) > ValueType(0))
{
radix_sort_entry_(s, e, (ValueType*)(0), RadixTraitsUnsigned<ValueType>());
}
else
{
radix_sort_entry_(s, e, (ValueType*)(0), RadixTraitsSigned<ValueType>());
}
}
//================= INTERFACES ====================
template <class RandomIt, class RadixTraits>
inline void radix_sort(RandomIt s, RandomIt e, RadixTraits radix_traits)
{
typename std::iterator_traits<RandomIt>::value_type* dummy(0);
radix_sort_entry_(s, e, dummy, radix_traits);
}
template <class RandomIt>
inline void radix_sort(RandomIt s, RandomIt e)
{
typename std::iterator_traits<RandomIt>::value_type* dummy(0);
radix_sort_entry_(s, e, dummy);
}
}
#endif
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment