BenchmarkCopySpeeds.cxx 6.24 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//
//  Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
//  Copyright 2017 UT-Battelle, LLC.
//  Copyright 2017 Los Alamos National Security.
//
//  Under the terms of Contract DE-NA0003525 with NTESS,
//  the U.S. Government retains certain rights in this software.
//
//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
//  Laboratory (LANL), the U.S. Government retains certain rights in
//  this software.
//============================================================

#include <vtkm/TypeTraits.h>

23
#include "Benchmarker.h"
24 25 26 27 28 29 30 31 32 33 34 35

#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorBadAllocation.h>
#include <vtkm/cont/Timer.h>

#include <vtkm/cont/serial/DeviceAdapterSerial.h>

#include <vtkm/internal/Configure.h>

#include <vtkm/testing/Testing.h>

36
#include <iomanip>
37 38 39
#include <iostream>
#include <sstream>

40
#ifdef VTKM_ENABLE_TBB
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
#include <tbb/task_scheduler_init.h>
#endif // TBB

// For the TBB implementation, the number of threads can be customized using a
// "NumThreads [numThreads]" argument.

namespace vtkm
{
namespace benchmarking
{

const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB
const vtkm::UInt64 COPY_SIZE_INC = 1;         // Used as 'size <<= INC'

const size_t COL_WIDTH = 32;

template <typename ValueType, typename DeviceAdapter>
struct MeasureCopySpeed
{
61
  using Algo = vtkm::cont::Algorithm;
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

  vtkm::cont::ArrayHandle<ValueType> Source;
  vtkm::cont::ArrayHandle<ValueType> Destination;
  vtkm::UInt64 NumBytes;

  VTKM_CONT
  MeasureCopySpeed(vtkm::UInt64 bytes)
    : NumBytes(bytes)
  {
    vtkm::Id numValues = static_cast<vtkm::Id>(bytes / sizeof(ValueType));
    this->Source.Allocate(numValues);
  }

  VTKM_CONT vtkm::Float64 operator()()
  {
77 78
    vtkm::cont::Timer timer{ DeviceAdapter() };
    timer.Start();
79
    Algo::Copy(this->Source, this->Destination);
80

81 82 83 84 85
    return timer.GetElapsedTime();
  }

  VTKM_CONT std::string Description() const
  {
86 87
    vtkm::UInt64 actualSize = sizeof(ValueType);
    actualSize *= static_cast<vtkm::UInt64>(this->Source.GetNumberOfValues());
88
    std::ostringstream out;
89 90 91
    out << "Copying " << vtkm::cont::GetHumanReadableSize(this->NumBytes)
        << " (actual=" << vtkm::cont::GetHumanReadableSize(actualSize) << ") of "
        << vtkm::testing::TypeName<ValueType>::Name() << "\n";
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
    return out.str();
  }
};

void PrintRow(std::ostream& out, const std::string& label, const std::string& data)
{
  out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |"
      << std::endl;
}

void PrintDivider(std::ostream& out)
{
  const std::string fillStr(COL_WIDTH, '-');

  out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
}

109 110
template <typename ValueType, typename DeviceAdapter>
void BenchmarkValueType(vtkm::cont::DeviceAdapterId id)
111
{
112
  PrintRow(std::cout, vtkm::testing::TypeName<ValueType>::Name(), id.GetName());
113 114 115 116 117 118

  PrintDivider(std::cout);

  Benchmarker bench(15, 100);
  for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
  {
119
    MeasureCopySpeed<ValueType, DeviceAdapter> functor(size);
120 121 122 123 124 125 126
    bench.Reset();

    std::string speedStr;

    try
    {
      bench.GatherSamples(functor);
127
      vtkm::Float64 speed = static_cast<Float64>(size) / stats::Mean(bench.GetSamples());
128
      speedStr = vtkm::cont::GetHumanReadableSize(static_cast<UInt64>(speed)) + std::string("/s");
129
    }
130
    catch (vtkm::cont::ErrorBadAllocation&)
131 132 133 134
    {
      speedStr = "[allocation too large]";
    }

135
    PrintRow(std::cout, vtkm::cont::GetHumanReadableSize(size), speedStr);
136 137 138 139 140 141 142
  }

  std::cout << "\n";
}
}
} // end namespace vtkm::benchmarking

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
namespace
{
using namespace vtkm::benchmarking;

struct BenchmarkValueTypeFunctor
{
  template <typename DeviceAdapter>
  bool operator()(DeviceAdapter id)
  {
    BenchmarkValueType<vtkm::UInt8, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 2>, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 3>, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::UInt8, 4>, DeviceAdapter>(id);

    BenchmarkValueType<vtkm::UInt32, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::UInt32, 2>, DeviceAdapter>(id);

    BenchmarkValueType<vtkm::UInt64, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::UInt64, 2>, DeviceAdapter>(id);

    BenchmarkValueType<vtkm::Float32, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::Float32, 2>, DeviceAdapter>(id);

    BenchmarkValueType<vtkm::Float64, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Vec<vtkm::Float64, 2>, DeviceAdapter>(id);

    BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>, DeviceAdapter>(id);
    BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>, DeviceAdapter>(id);

    return true;
  }
};
}

179 180
int main(int argc, char* argv[])
{
181 182
  auto opts = vtkm::cont::InitializeOptions::RequireDevice |
    vtkm::cont::InitializeOptions::ErrorOnBadOption | vtkm::cont::InitializeOptions::AddHelp;
183
  auto config = vtkm::cont::Initialize(argc, argv, opts);
184

185

186
#ifdef VTKM_ENABLE_TBB
187 188 189
  int numThreads = tbb::task_scheduler_init::automatic;
#endif // TBB

190
  if (argc == 3)
191
  {
192
    if (std::string(argv[1]) == "NumThreads")
193
    {
194
#ifdef VTKM_ENABLE_TBB
195
      std::istringstream parse(argv[2]);
196 197 198 199 200 201 202 203
      parse >> numThreads;
      std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
#else
      std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl;
#endif // TBB
    }
  }

204
#ifdef VTKM_ENABLE_TBB
205 206 207 208
  // Must not be destroyed as long as benchmarks are running:
  tbb::task_scheduler_init init(numThreads);
#endif // TBB

209 210
  BenchmarkValueTypeFunctor functor;
  vtkm::cont::TryExecuteOnDevice(config.Device, functor);
211
}