Commit 546b2144 authored by Allison Vacanti's avatar Allison Vacanti Committed by Kitware Robot

Merge topic 'copy_speed_benchmarks'

4cd79193 Ensure that Pair and Vec are trivial classes.
d465d030 Add benchmark to print copy speeds.
b582b079 Modify Benchmarker to expose samples and reduce iterations.
d0fa70de Reduce overhead and fix bugs in device adapter benchmarks.
05419719 Add more options to the device adapter algorithm benchmark.
Acked-by: Kitware Robot's avatarKitware Robot <kwrobot@kitware.com>
Acked-by: Robert Maynard's avatarRobert Maynard <robert.maynard@kitware.com>
Merge-request: !972
parents 502787b1 4cd79193
......@@ -65,11 +65,7 @@ struct Pair
SecondType second;
VTKM_EXEC_CONT
Pair()
: first()
, second()
{
}
Pair() = default;
VTKM_EXEC_CONT
Pair(const FirstType& firstSrc, const SecondType& secondSrc)
......@@ -93,12 +89,8 @@ struct Pair
}
VTKM_EXEC_CONT
vtkm::Pair<FirstType, SecondType>& operator=(const vtkm::Pair<FirstType, SecondType>& src)
{
this->first = src.first;
this->second = src.second;
return *this;
}
vtkm::Pair<FirstType, SecondType>& operator=(const vtkm::Pair<FirstType, SecondType>& src) =
default;
VTKM_EXEC_CONT
bool operator==(const vtkm::Pair<FirstType, SecondType>& other) const
......
......@@ -438,7 +438,7 @@ public:
protected:
VTKM_EXEC_CONT
VecBaseCommon() {}
VecBaseCommon() = default;
VTKM_EXEC_CONT
const DerivedClass& Derived() const { return *static_cast<const DerivedClass*>(this); }
......@@ -666,7 +666,7 @@ public:
protected:
VTKM_EXEC_CONT
VecBase() {}
VecBase() = default;
VTKM_EXEC_CONT
explicit VecBase(const ComponentType& value)
......@@ -824,7 +824,7 @@ public:
static const vtkm::IdComponent NUM_COMPONENTS = Size;
#endif
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const T& value)
: Superclass(value)
{
......@@ -851,7 +851,7 @@ public:
using ComponentType = T;
static const vtkm::IdComponent NUM_COMPONENTS = 0;
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const ComponentType&) {}
template <typename OtherType>
......@@ -882,7 +882,7 @@ class VTKM_ALWAYS_EXPORT Vec<T, 1> : public detail::VecBase<T, 1, Vec<T, 1>>
using Superclass = detail::VecBase<T, 1, Vec<T, 1>>;
public:
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const T& value)
: Superclass(value)
{
......@@ -912,7 +912,7 @@ class VTKM_ALWAYS_EXPORT Vec<T, 2> : public detail::VecBase<T, 2, Vec<T, 2>>
using Superclass = detail::VecBase<T, 2, Vec<T, 2>>;
public:
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const T& value)
: Superclass(value)
{
......@@ -941,7 +941,7 @@ class VTKM_ALWAYS_EXPORT Vec<T, 3> : public detail::VecBase<T, 3, Vec<T, 3>>
using Superclass = detail::VecBase<T, 3, Vec<T, 3>>;
public:
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const T& value)
: Superclass(value)
{
......@@ -972,7 +972,7 @@ class VTKM_ALWAYS_EXPORT Vec<T, 4> : public detail::VecBase<T, 4, Vec<T, 4>>
using Superclass = detail::VecBase<T, 4, Vec<T, 4>>;
public:
VTKM_EXEC_CONT Vec() {}
VTKM_EXEC_CONT Vec() = default;
VTKM_EXEC_CONT explicit Vec(const T& value)
: Superclass(value)
{
......
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2017 UT-Battelle, LLC.
// Copyright 2017 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================
#include <vtkm/TypeTraits.h>
#include <vtkm/benchmarking/Benchmarker.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorBadAllocation.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/internal/Configure.h>
#include <vtkm/testing/Testing.h>
#include <iostream>
#include <sstream>
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
#include <tbb/task_scheduler_init.h>
#endif // TBB
// For the TBB implementation, the number of threads can be customized using a
// "NumThreads [numThreads]" argument.
namespace vtkm
{
namespace benchmarking
{
const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB
const vtkm::UInt64 COPY_SIZE_INC = 1; // Used as 'size <<= INC'
const size_t COL_WIDTH = 32;
template <typename ValueType, typename DeviceAdapter>
struct MeasureCopySpeed
{
using Algo = vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapter>;
vtkm::cont::ArrayHandle<ValueType> Source;
vtkm::cont::ArrayHandle<ValueType> Destination;
vtkm::UInt64 NumBytes;
VTKM_CONT
MeasureCopySpeed(vtkm::UInt64 bytes)
: NumBytes(bytes)
{
vtkm::Id numValues = static_cast<vtkm::Id>(bytes / sizeof(ValueType));
this->Source.Allocate(numValues);
}
VTKM_CONT vtkm::Float64 operator()()
{
vtkm::cont::Timer<DeviceAdapter> timer;
Algo::Copy(this->Source, this->Destination);
return timer.GetElapsedTime();
}
VTKM_CONT std::string Description() const
{
vtkm::UInt64 actualSize =
static_cast<vtkm::UInt64>(this->Source.GetNumberOfValues() * sizeof(ValueType));
std::ostringstream out;
out << "Copying " << HumanSize(static_cast<vtkm::Float64>(this->NumBytes))
<< " (actual=" << HumanSize(static_cast<vtkm::Float64>(actualSize)) << ") of "
<< vtkm::testing::TypeName<ValueType>::Name() << "\n";
return out.str();
}
};
void PrintRow(std::ostream& out, const std::string& label, const std::string& data)
{
out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |"
<< std::endl;
}
void PrintDivider(std::ostream& out)
{
const std::string fillStr(COL_WIDTH, '-');
out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
}
template <typename ValueType>
void BenchmarkValueType()
{
PrintRow(std::cout,
vtkm::testing::TypeName<ValueType>::Name(),
vtkm::cont::DeviceAdapterTraits<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>::GetName());
PrintDivider(std::cout);
Benchmarker bench(15, 100);
for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
{
MeasureCopySpeed<ValueType, VTKM_DEFAULT_DEVICE_ADAPTER_TAG> functor(size);
bench.Reset();
std::string speedStr;
try
{
bench.GatherSamples(functor);
vtkm::Float64 speed = static_cast<vtkm::Float64>(size) / stats::Mean(bench.GetSamples());
speedStr = HumanSize(speed) + std::string("/s");
}
catch (vtkm::cont::ErrorBadAllocation& e)
{
speedStr = "[allocation too large]";
}
PrintRow(std::cout, HumanSize(static_cast<vtkm::Float64>(size)), speedStr);
}
std::cout << "\n";
}
}
} // end namespace vtkm::benchmarking
int main(int argc, char* argv[])
{
using namespace vtkm::benchmarking;
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
int numThreads = tbb::task_scheduler_init::automatic;
#endif // TBB
if (argc == 3)
{
if (std::string(argv[1]) == "NumThreads")
{
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
std::istringstream parse(argv[2]);
parse >> numThreads;
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
#else
std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl;
#endif // TBB
}
}
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
// Must not be destroyed as long as benchmarks are running:
tbb::task_scheduler_init init(numThreads);
#endif // TBB
BenchmarkValueType<vtkm::UInt8>();
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 2>>();
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 3>>();
BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 4>>();
BenchmarkValueType<vtkm::UInt32>();
BenchmarkValueType<vtkm::Vec<vtkm::UInt32, 2>>();
BenchmarkValueType<vtkm::UInt64>();
BenchmarkValueType<vtkm::Vec<vtkm::UInt64, 2>>();
BenchmarkValueType<vtkm::Float32>();
BenchmarkValueType<vtkm::Vec<vtkm::Float32, 2>>();
BenchmarkValueType<vtkm::Float64>();
BenchmarkValueType<vtkm::Vec<vtkm::Float64, 2>>();
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>>();
BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>>();
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>>();
BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>>();
}
......@@ -223,46 +223,82 @@ vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
* in seconds, this lets us avoid including any per-run setup time in the benchmark.
* However any one-time setup should be done in the functor's constructor
*/
struct Benchmarker
class Benchmarker
{
const vtkm::Float64 MAX_RUNTIME;
const size_t MAX_ITERATIONS;
std::vector<vtkm::Float64> Samples;
std::string BenchmarkName;
Benchmarker()
: MAX_RUNTIME(30)
, MAX_ITERATIONS(500)
const vtkm::Float64 MaxRuntime;
const size_t MaxIterations;
public:
VTKM_CONT
Benchmarker(vtkm::Float64 maxRuntime = 30, std::size_t maxIterations = 100)
: MaxRuntime(maxRuntime)
, MaxIterations(maxIterations)
{
}
template <typename Functor>
VTKM_CONT void operator()(Functor func) const
VTKM_CONT void GatherSamples(Functor func)
{
std::vector<vtkm::Float64> samples;
this->Samples.clear();
this->BenchmarkName = func.Description();
// Do a warm-up run. If the benchmark allocates any additional memory
// eg. storage for output results, this will let it do that and
// allow us to avoid measuring the allocation time in the actual benchmark run
func();
samples.reserve(MAX_ITERATIONS);
this->Samples.reserve(this->MaxIterations);
// Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever
// takes less time. This kind of assumes that running for 500 iterations or 1.5s will give
// takes less time. This kind of assumes that running for 500 iterations or 30s will give
// good statistics, but if median abs dev and/or std dev are too high both these limits
// could be increased
size_t iter = 0;
for (vtkm::Float64 elapsed = 0.0; elapsed < MAX_RUNTIME && iter < MAX_ITERATIONS;
elapsed += samples.back(), ++iter)
for (vtkm::Float64 elapsed = 0.0; elapsed < this->MaxRuntime && iter < this->MaxIterations;
elapsed += this->Samples.back(), ++iter)
{
samples.push_back(func());
this->Samples.push_back(func());
}
std::sort(samples.begin(), samples.end());
stats::Winsorize(samples, 5.0);
std::cout << "Benchmark \'" << func.Description() << "\' results:\n"
<< "\tmedian = " << stats::PercentileValue(samples, 50.0) << "s\n"
<< "\tmedian abs dev = " << stats::MedianAbsDeviation(samples) << "s\n"
<< "\tmean = " << stats::Mean(samples) << "s\n"
<< "\tstd dev = " << stats::StandardDeviation(samples) << "s\n"
<< "\tmin = " << samples.front() << "s\n"
<< "\tmax = " << samples.back() << "s\n";
std::sort(this->Samples.begin(), this->Samples.end());
stats::Winsorize(this->Samples, 5.0);
}
VTKM_CONT void PrintSummary(std::ostream& out = std::cout)
{
out << "Benchmark \'" << this->BenchmarkName << "\' results:\n";
if (this->Samples.empty())
{
out << "\tNo samples gathered!\n";
return;
}
out << "\tnumSamples = " << this->Samples.size() << "\n"
<< "\tmedian = " << stats::PercentileValue(this->Samples, 50.0) << "s\n"
<< "\tmedian abs dev = " << stats::MedianAbsDeviation(this->Samples) << "s\n"
<< "\tmean = " << stats::Mean(this->Samples) << "s\n"
<< "\tstd dev = " << stats::StandardDeviation(this->Samples) << "s\n"
<< "\tmin = " << this->Samples.front() << "s\n"
<< "\tmax = " << this->Samples.back() << "s\n";
}
template <typename Functor>
VTKM_CONT void operator()(Functor func)
{
this->GatherSamples(func);
this->PrintSummary();
}
VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
VTKM_CONT void Reset()
{
this->Samples.clear();
this->BenchmarkName.clear();
}
};
......
......@@ -20,6 +20,7 @@
set(benchmark_srcs
BenchmarkArrayTransfer.cxx
BenchmarkCopySpeeds.cxx
BenchmarkDeviceAdapter.cxx
BenchmarkFieldAlgorithms.cxx
BenchmarkTopologyAlgorithms.cxx
......
......@@ -141,12 +141,12 @@ struct SwizzleTests
auto refPortal = this->RefArray.GetPortalConstControl();
auto testPortal = testArray.GetPortalConstControl();
SwizzleVectorType refVecSwizzle(vtkm::TypeTraits<SwizzleVectorType>::ZeroInitialization());
for (vtkm::Id i = 0; i < testArray.GetNumberOfValues(); ++i)
{
// Manually swizzle the reference vector using the runtime map information:
ReferenceVectorType refVec = refPortal.Get(i);
SwizzleVectorType refVecSwizzle;
// Manually swizzle the reference vector using the runtime map information:
for (size_t j = 0; j < map.size(); ++j)
{
refVecSwizzle[static_cast<vtkm::IdComponent>(j)] = refVec[map[j]];
......
......@@ -32,6 +32,7 @@
#include <vtkm/VecTraits.h>
#include <exception>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <string>
......@@ -557,6 +558,12 @@ static inline VTKM_EXEC_CONT vtkm::Vec<T, N> TestValue(vtkm::Id index, vtkm::Vec
return value;
}
template <typename U, typename V>
static inline VTKM_EXEC_CONT vtkm::Pair<U, V> TestValue(vtkm::Id index, vtkm::Pair<U, V>)
{
return vtkm::Pair<U, V>(TestValue(2 * index, U()), TestValue(2 * index + 1, V()));
}
static inline VTKM_CONT std::string TestValue(vtkm::Id index, std::string)
{
std::stringstream stream;
......@@ -622,4 +629,45 @@ static inline VTKM_CONT bool test_equal_portals(const PortalType1& portal1,
return true;
}
/// Convert a size in bytes to a human readable string (e.g. "64 bytes",
/// "1.44 MiB", "128 GiB", etc)
static inline VTKM_CONT std::string HumanSize(vtkm::Float64 bytes)
{
std::string suffix = "bytes";
if (bytes >= 1024.)
{
bytes /= 1024.;
suffix = "KiB";
}
if (bytes >= 1024.)
{
bytes /= 1024.;
suffix = "MiB";
}
if (bytes >= 1024.)
{
bytes /= 1024.;
suffix = "GiB";
}
if (bytes >= 1024.)
{
bytes /= 1024.;
suffix = "TiB";
}
if (bytes >= 1024.)
{
bytes /= 1024.;
suffix = "PiB"; // Dream big...
}
std::ostringstream out;
out << std::fixed << std::setprecision(2) << bytes << " " << suffix;
return out.str();
}
#endif //vtk_m_testing_Testing_h
......@@ -31,6 +31,16 @@ namespace
template <typename T, typename U>
void PairTest()
{
{
using P = vtkm::Pair<T, U>;
// Pair types should preserve the trivial properties of their components.
// This insures that algorithms like std::copy will optimize fully.
VTKM_TEST_ASSERT(std::is_trivial<T>::value &&
std::is_trivial<U>::value == std::is_trivial<P>::value,
"PairType's triviality differs from ComponentTypes.");
}
//test that all the constructors work properly
{
vtkm::Pair<T, U> no_params_pair;
......
......@@ -323,6 +323,11 @@ void GeneralVecTypeTest(const vtkm::Vec<ComponentType, Size>&)
typedef vtkm::Vec<ComponentType, Size> T;
// Vector types should preserve the trivial properties of their components.
// This insures that algorithms like std::copy will optimize fully.
VTKM_TEST_ASSERT(std::is_trivial<ComponentType>::value == std::is_trivial<T>::value,
"VectorType's triviality differs from ComponentType.");
VTKM_TEST_ASSERT(T::NUM_COMPONENTS == Size, "NUM_COMPONENTS is wrong size.");
//grab the number of elements of T
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment