Benchmarker.h 12.1 KB
Newer Older
1 2 3 4 5 6 7 8
//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//
9
//  Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
10 11 12
//  Copyright 2014 UT-Battelle, LLC.
//  Copyright 2014 Los Alamos National Security.
//
13
//  Under the terms of Contract DE-NA0003525 with NTESS,
14 15 16 17 18 19 20 21 22 23
//  the U.S. Government retains certain rights in this software.
//
//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
//  Laboratory (LANL), the U.S. Government retains certain rights in
//  this software.
//============================================================================

#ifndef vtk_m_benchmarking_Benchmarker_h
#define vtk_m_benchmarking_Benchmarker_h

24
#include <vtkm/ListTag.h>
25
#include <vtkm/Math.h>
26 27
#include <vtkm/cont/TryExecute.h>
#include <vtkm/cont/internal/DeviceAdapterTag.h>
28
#include <vtkm/cont/testing/Testing.h>
29 30 31

#include <algorithm>
#include <iostream>
32
#include <vector>
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

/*
 * Writing a Benchmark
 * -------------------
 * To write a benchmark you must provide a functor that will run the operations
 * you want to time and return the run time of those operations using the timer
 * for the device. The benchmark should also be templated on the value type being
 * operated on. Then use VTKM_MAKE_BENCHMARK to generate a maker functor and
 * VTKM_RUN_BENCHMARK to run the benchmark on a list of types.
 *
 * For Example:
 *
 * template<typename Value>
 * struct BenchSilly {
 *   // Setup anything that doesn't need to change per run in the constructor
48
 *   VTKM_CONT BenchSilly(){}
49 50 51
 *
 *   // The overloaded call operator will run the operations being timed and
 *   // return the execution time
52
 *   VTKM_CONT
53 54 55 56 57 58
 *   vtkm::Float64 operator()(){
 *     return 0.05;
 *   }
 *
 *   // The benchmark must also provide a method describing itself, this is
 *   // used when printing out run time statistics
59
 *   VTKM_CONT
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
 *   std::string Description() const {
 *     return "A silly benchmark";
 *   }
 * };
 *
 * // Now use the VTKM_MAKE_BENCHMARK macro to generate a maker functor for
 * // your benchmark. This lets us generate the benchmark functor for each type
 * // we want to test
 * VTKM_MAKE_BENCHMARK(Silly, BenchSilly);
 *
 * // You can also optionally pass arguments to the constructor like so:
 * // VTKM_MAKE_BENCHMARK(Blah, BenchBlah, 1, 2, 3);
 * // Note that benchmark names (the first argument) must be unique so different
 * // parameters to the constructor should have different names
 *
 * // We can now run our benchmark using VTKM_RUN_BENCHMARK, passing the
 * // benchmark name and type list to run on
 * int main(int, char**){
 *   VTKM_RUN_BENCHMARK(Silly, vtkm::ListTagBase<vtkm::Float32>());
 *   return 0;
 * }
 *
 * Check out vtkm/benchmarking/BenchmarkDeviceAdapter.h for some example usage
 */

/*
 * Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
 * This is used to allow you to template the benchmark functor on the type being benchmarked
88 89 90
 * and the device adapter so you can write init code in the constructor. Then the maker will
 * return a constructed instance of your benchmark for the type being benchmarked.
 * The VA_ARGS are used to pass any extra arguments needed by your benchmark
91
 */
92 93 94
#define VTKM_MAKE_BENCHMARK(Name, Bench, ...)                                                      \
  struct MakeBench##Name                                                                           \
  {                                                                                                \
95 96 97
    template <typename Value, typename DeviceAdapter>                                              \
    VTKM_CONT Bench<Value, DeviceAdapter> operator()(const Value vtkmNotUsed(v),                   \
                                                     DeviceAdapter vtkmNotUsed(id)) const          \
98
    {                                                                                              \
99
      return Bench<Value, DeviceAdapter>(__VA_ARGS__);                                             \
100
    }                                                                                              \
101 102 103 104 105 106 107
  }

/*
 * Use the VTKM_RUN_BENCHMARK macro to run your benchmark on the type list passed.
 * You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
 * macro will look for and use
 */
108 109
#define VTKM_RUN_BENCHMARK(Name, Types, Id)                                                        \
  vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id))
110

111 112 113 114 115 116
namespace vtkm
{
namespace benchmarking
{
namespace stats
{
117 118
// Checks that the sequence is sorted, returns true if it's sorted, false
// otherwise
119 120 121
template <typename ForwardIt>
bool is_sorted(ForwardIt first, ForwardIt last)
{
122 123
  ForwardIt next = first;
  ++next;
124 125 126 127
  for (; next != last; ++next, ++first)
  {
    if (*first > *next)
    {
128 129 130 131 132
      return false;
    }
  }
  return true;
}
133 134 135

// Get the value representing the `percent` percentile of the
// sorted samples using linear interpolation
136 137 138
vtkm::Float64 PercentileValue(const std::vector<vtkm::Float64>& samples,
                              const vtkm::Float64 percent)
{
139
  VTKM_ASSERT(!samples.empty());
140 141
  if (samples.size() == 1)
  {
142 143
    return samples.front();
  }
144 145
  VTKM_ASSERT(percent >= 0.0);
  VTKM_ASSERT(percent <= 100.0);
146 147 148
  VTKM_ASSERT(vtkm::benchmarking::stats::is_sorted(samples.begin(), samples.end()));
  if (percent == 100.0)
  {
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
    return samples.back();
  }
  // Find the two nearest percentile values and linearly
  // interpolate between them
  const vtkm::Float64 rank = percent / 100.0 * (static_cast<vtkm::Float64>(samples.size()) - 1.0);
  const vtkm::Float64 low_rank = vtkm::Floor(rank);
  const vtkm::Float64 dist = rank - low_rank;
  const size_t k = static_cast<size_t>(low_rank);
  const vtkm::Float64 low = samples[k];
  const vtkm::Float64 high = samples[k + 1];
  return low + (high - low) * dist;
}
// Winsorize the samples to clean up any very extreme outliers
// Will replace all samples below `percent` and above 100 - `percent` percentiles
// with the value at the percentile
// NOTE: Assumes the samples have been sorted, as we make use of PercentileValue
165 166
void Winsorize(std::vector<vtkm::Float64>& samples, const vtkm::Float64 percent)
{
167 168
  const vtkm::Float64 low_percentile = PercentileValue(samples, percent);
  const vtkm::Float64 high_percentile = PercentileValue(samples, 100.0 - percent);
169 170 171 172
  for (std::vector<vtkm::Float64>::iterator it = samples.begin(); it != samples.end(); ++it)
  {
    if (*it < low_percentile)
    {
173 174
      *it = low_percentile;
    }
175 176
    else if (*it > high_percentile)
    {
177 178 179 180 181
      *it = high_percentile;
    }
  }
}
// Compute the mean value of the dataset
182 183
vtkm::Float64 Mean(const std::vector<vtkm::Float64>& samples)
{
184
  vtkm::Float64 mean = 0;
185 186
  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
  {
187 188 189 190 191
    mean += *it;
  }
  return mean / static_cast<vtkm::Float64>(samples.size());
}
// Compute the sample variance of the samples
192 193
vtkm::Float64 Variance(const std::vector<vtkm::Float64>& samples)
{
194 195
  vtkm::Float64 mean = Mean(samples);
  vtkm::Float64 square_deviations = 0;
196 197
  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
  {
198 199 200 201 202
    square_deviations += vtkm::Pow(*it - mean, 2.0);
  }
  return square_deviations / (static_cast<vtkm::Float64>(samples.size()) - 1.0);
}
// Compute the standard deviation of the samples
203 204
vtkm::Float64 StandardDeviation(const std::vector<vtkm::Float64>& samples)
{
205 206 207
  return vtkm::Sqrt(Variance(samples));
}
// Compute the median absolute deviation of the dataset
208 209
vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
{
210 211 212
  std::vector<vtkm::Float64> abs_deviations;
  abs_deviations.reserve(samples.size());
  const vtkm::Float64 median = PercentileValue(samples, 50.0);
213 214
  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
  {
215 216
    abs_deviations.push_back(vtkm::Abs(*it - median));
  }
217
  std::sort(abs_deviations.begin(), abs_deviations.end());
218 219 220 221 222 223 224 225 226 227 228
  return PercentileValue(abs_deviations, 50.0);
}
} // stats

/*
 * The benchmarker takes a functor to benchmark and runs it multiple times,
 * printing out statistics of the run time at the end.
 * The functor passed should return the run time of the thing being benchmarked
 * in seconds, this lets us avoid including any per-run setup time in the benchmark.
 * However any one-time setup should be done in the functor's constructor
 */
229
struct Benchmarker
230
{
231 232
  std::vector<vtkm::Float64> Samples;
  std::string BenchmarkName;
233

234 235 236 237 238 239 240 241
  const vtkm::Float64 MaxRuntime;
  const size_t MaxIterations;

public:
  VTKM_CONT
  Benchmarker(vtkm::Float64 maxRuntime = 30, std::size_t maxIterations = 100)
    : MaxRuntime(maxRuntime)
    , MaxIterations(maxIterations)
242 243
  {
  }
244

245
  template <typename Functor>
246
  VTKM_CONT void GatherSamples(Functor func)
247
  {
248 249 250
    this->Samples.clear();
    this->BenchmarkName = func.Description();

251 252 253 254 255
    // Do a warm-up run. If the benchmark allocates any additional memory
    // eg. storage for output results, this will let it do that and
    // allow us to avoid measuring the allocation time in the actual benchmark run
    func();

256 257
    this->Samples.reserve(this->MaxIterations);

258
    // Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever
259
    // takes less time. This kind of assumes that running for 500 iterations or 30s will give
260 261 262
    // good statistics, but if median abs dev and/or std dev are too high both these limits
    // could be increased
    size_t iter = 0;
263 264
    for (vtkm::Float64 elapsed = 0.0; elapsed < this->MaxRuntime && iter < this->MaxIterations;
         elapsed += this->Samples.back(), ++iter)
265
    {
266
      this->Samples.push_back(func());
267
    }
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291

    std::sort(this->Samples.begin(), this->Samples.end());
    stats::Winsorize(this->Samples, 5.0);
  }

  VTKM_CONT void PrintSummary(std::ostream& out = std::cout)
  {
    out << "Benchmark \'" << this->BenchmarkName << "\' results:\n";

    if (this->Samples.empty())
    {
      out << "\tNo samples gathered!\n";
      return;
    }

    out << "\tnumSamples = " << this->Samples.size() << "\n"
        << "\tmedian = " << stats::PercentileValue(this->Samples, 50.0) << "s\n"
        << "\tmedian abs dev = " << stats::MedianAbsDeviation(this->Samples) << "s\n"
        << "\tmean = " << stats::Mean(this->Samples) << "s\n"
        << "\tstd dev = " << stats::StandardDeviation(this->Samples) << "s\n"
        << "\tmin = " << this->Samples.front() << "s\n"
        << "\tmax = " << this->Samples.back() << "s\n";
  }

292 293
  template <typename DeviceAdapter, typename MakerFunctor, typename T>
  VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t)
294
  {
295
    auto func = makerFunctor(t, id);
296 297
    this->GatherSamples(func);
    this->PrintSummary();
298
    return true;
299 300 301 302 303 304 305 306
  }

  VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }

  VTKM_CONT void Reset()
  {
    this->Samples.clear();
    this->BenchmarkName.clear();
307 308 309
  }
};

310 311 312
template <typename MakerFunctor>
class InternalPrintTypeAndBench
{
313 314 315
  MakerFunctor Maker;

public:
316
  VTKM_CONT
317 318 319 320
  InternalPrintTypeAndBench(MakerFunctor maker)
    : Maker(maker)
  {
  }
321

322
  template <typename T>
323
  VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const
324
  {
325 326
    std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " on device " << id.GetName()
              << " ***************" << std::endl;
327
    Benchmarker bench;
328 329
    try
    {
330
      vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t);
331 332 333 334
    }
    catch (std::exception& e)
    {
      std::cout << "\n"
luz.paz's avatar
luz.paz committed
335
                << "An exception occurring during a benchmark:\n\t" << e.what() << "\n"
336 337
                << "Attempting to continue with remaining benchmarks...\n\n";
    }
338 339 340
  }
};

341
template <class MakerFunctor, class TypeList>
342
VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id)
343
{
344 345
  vtkm::ListForEach(
    InternalPrintTypeAndBench<MakerFunctor>(std::forward<MakerFunctor>(maker)), TypeList(), id);
346 347 348 349 350
}
}
}

#endif