Commit 1d5a40e5 authored by Allison Vacanti's avatar Allison Vacanti Committed by Kitware Robot

Merge topic '288_atomic_array_benchmarks'

f16e1011 Cleanup / expand the benchmark parser scripts.
39f42552 Add benchmark for atomic arrays.
Acked-by: Kitware Robot's avatarKitware Robot <kwrobot@kitware.com>
Merge-request: !1389
parents 65d96ae8 f16e1011
...@@ -3,14 +3,11 @@ ...@@ -3,14 +3,11 @@
# Compares the output from BenchmarkDeviceAdapter from the serial # Compares the output from BenchmarkDeviceAdapter from the serial
# device to a parallel device and prints a table containing the results. # device to a parallel device and prints a table containing the results.
# #
# While this was written for the device adapter algorithms, it could be used
# to compare any VTKM benchmark output.
#
# Example usage: # Example usage:
# #
# $ BenchmarkDeviceAdapter_SERIAL > serial.out # $ BenchmarkDeviceAdapter_SERIAL > serial.out
# $ BenchmarkDeviceAdapter_TBB > tbb.out # $ BenchmarkDeviceAdapter_TBB > tbb.out
# $ devAlgoBenchSummary.py serial.out tbb.out # $ benchCompare.py serial.out tbb.out
# #
# #
# The number of threads (optional -- only used to generate the "Warn" column) # The number of threads (optional -- only used to generate the "Warn" column)
......
#!/usr/bin/env python
#
# Prints a concise summary of a benchmark output as a TSV blob.
#
# Example usage:
#
# $ BenchmarkXXX_DEVICE > bench.out
# $ benchSummary.py bench.out
#
# Options SortByType, SortByName, or SortByMean may be passed after the
# filename to sort the output by the indicated quantity. If no sort option
# is provided, the output order matches the input. If multiple options are
# specified, the list will be sorted repeatedly in the order requested.
import re
import sys
assert(len(sys.argv) >= 2)
# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
nameParser = re.compile("Benchmark '([^-]+)' results:")
# Parses "mean = 0.0125s" --> 0.0125
meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
# Parses "std dev = 0.0125s" --> 0.0125
stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
filename = sys.argv[1]
benchFile = open(filename, 'r')
sortOpt = None
if len(sys.argv) > 2:
sortOpt = sys.argv[2:]
class BenchKey:
def __init__(self, name_, type_):
self.name = name_
self.type = type_
def __eq__(self, other):
return self.name == other.name and self.type == other.type
def __lt__(self, other):
if self.name < other.name: return True
elif self.name > other.name: return False
else: return self.type < other.type
def __hash__(self):
return (self.name + self.type).__hash__()
class BenchData:
def __init__(self, mean_, stdDev_):
self.mean = mean_
self.stdDev = stdDev_
def parseFile(f, benchmarks):
type = ""
bench = ""
mean = -1.
stdDev = -1.
for line in f:
typeRes = typeParser.match(line)
if typeRes:
type = typeRes.group(1)
continue
nameRes = nameParser.match(line)
if nameRes:
name = nameRes.group(1)
continue
meanRes = meanParser.match(line)
if meanRes:
mean = float(meanRes.group(1))
continue
stdDevRes = stdDevParser.match(line)
if stdDevRes:
stdDev = float(stdDevRes.group(1))
# stdDev is always the last parse for a given benchmark, add entry now
benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
mean = -1.
stdDev = -1.
continue
benchmarks = {}
parseFile(benchFile, benchmarks)
# Sort keys by type:
keys = benchmarks.keys()
if sortOpt:
for opt in sortOpt:
if opt.lower() == "sortbytype":
keys = sorted(keys, key=lambda k: k.type)
elif opt.lower() == "sortbyname":
keys = sorted(keys, key=lambda k: k.name)
elif opt.lower() == "sortbymean":
keys = sorted(keys, key=lambda k: benchmarks[k].mean)
print("# Summary: (%s)"%filename)
print("%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Benchmark (type)"))
for key in keys:
data = benchmarks[key]
print("%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, key.name, key.type))
#!/usr/bin/env python
#
# Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are
# expected to have "Baseline" in the name, and a matching benchmark with the
# same name but Baseline replaced with something else. For example,
#
# Baseline benchmark name: "Some benchmark: Baseline, Size=4"
# Test benchmark name: "Some benchmark: Blahblah, Size=4"
#
# The output will print the baseline, test, and overhead times for the
# benchmarks.
#
# Example usage:
#
# $ BenchmarkXXX_DEVICE > bench.out
# $ benchSummaryWithBaselines.py bench.out
#
# Options SortByType, SortByName, SortByOverhead, or SortByRatio
# (testtime/baseline) may be passed after the filename to sort the output by
# the indicated quantity. If no sort option is provided, the output order
# matches the input. If multiple options are specified, the list will be sorted
# repeatedly in the order requested.
import re
import sys
assert(len(sys.argv) >= 2)
# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
nameParser = re.compile("Benchmark '([^-]+)' results:")
# Parses "mean = 0.0125s" --> 0.0125
meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
# Parses "std dev = 0.0125s" --> 0.0125
stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
# Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text")
baselineParser = re.compile("(.*)Baseline(.*)")
filename = sys.argv[1]
benchFile = open(filename, 'r')
sortOpt = None
if len(sys.argv) > 2:
sortOpt = sys.argv[2:]
class BenchKey:
def __init__(self, name_, type_):
self.name = name_
self.type = type_
def __eq__(self, other):
return self.name == other.name and self.type == other.type
def __lt__(self, other):
if self.name < other.name: return True
elif self.name > other.name: return False
else: return self.type < other.type
def __hash__(self):
return (self.name + self.type).__hash__()
class BenchData:
def __init__(self, mean_, stdDev_):
self.mean = mean_
self.stdDev = stdDev_
def parseFile(f, benchmarks):
type = ""
bench = ""
mean = -1.
stdDev = -1.
for line in f:
typeRes = typeParser.match(line)
if typeRes:
type = typeRes.group(1)
continue
nameRes = nameParser.match(line)
if nameRes:
name = nameRes.group(1)
continue
meanRes = meanParser.match(line)
if meanRes:
mean = float(meanRes.group(1))
continue
stdDevRes = stdDevParser.match(line)
if stdDevRes:
stdDev = float(stdDevRes.group(1))
# stdDev is always the last parse for a given benchmark, add entry now
benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
mean = -1.
stdDev = -1.
continue
class BaselinedBenchData:
def __init__(self, baseline, test):
self.baseline = baseline.mean
self.test = test.mean
self.overhead = test.mean - baseline.mean
def findBaselines(benchmarks):
result = {}
for baseKey in benchmarks.keys():
# Look for baseline entries
baselineRes = baselineParser.match(baseKey.name)
if baselineRes:
prefix = baselineRes.group(1)
suffix = baselineRes.group(2)
# Find the test entry matching the baseline:
for testKey in benchmarks.keys():
if baseKey.type != testKey.type: # Need same type
continue
if baseKey.name == testKey.name: # Skip the base key
continue
if testKey.name.startswith(prefix) and testKey.name.endswith(suffix):
newName = (prefix + suffix).replace(", ,", ",")
newKey = BenchKey(newName, testKey.type)
newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey])
result[newKey] = newVal
return result
benchmarks = {}
parseFile(benchFile, benchmarks)
benchmarks = findBaselines(benchmarks)
# Sort keys by type:
keys = benchmarks.keys()
if sortOpt:
for opt in sortOpt:
if opt.lower() == "sortbytype":
keys = sorted(keys, key=lambda k: k.type)
elif opt.lower() == "sortbyname":
keys = sorted(keys, key=lambda k: k.name)
elif opt.lower() == "sortbyoverhead":
keys = sorted(keys, key=lambda k: benchmarks[k].overhead)
elif opt.lower() == "sortbyratio":
keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline)
print("# Summary: (%s)"%filename)
print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)"))
for key in keys:
data = benchmarks[key]
print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test,
data.overhead, data.test / data.baseline, key.name, key.type))
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/AtomicArray.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/exec/FunctorBase.h>
#include <iomanip>
#include <sstream>
#include <string>
namespace vtkm
{
namespace benchmarking
{
// This is 32x larger than the largest array size.
static constexpr vtkm::Id NumWrites = 33554432; // 2^25
#define MAKE_ATOMIC_BENCHMARKS(Name, Class) \
VTKM_MAKE_BENCHMARK(Name##1, Class, 1); \
VTKM_MAKE_BENCHMARK(Name##8, Class, 8); \
VTKM_MAKE_BENCHMARK(Name##32, Class, 32); \
VTKM_MAKE_BENCHMARK(Name##512, Class, 512); \
VTKM_MAKE_BENCHMARK(Name##2048, Class, 2048); \
VTKM_MAKE_BENCHMARK(Name##32768, Class, 32768); \
VTKM_MAKE_BENCHMARK(Name##1048576, Class, 1048576)
#define RUN_ATOMIC_BENCHMARKS(Name) \
VTKM_RUN_BENCHMARK(Name##1, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##8, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##32, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##512, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##2048, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##32768, vtkm::cont::AtomicArrayTypeListTag{}); \
VTKM_RUN_BENCHMARK(Name##1048576, vtkm::cont::AtomicArrayTypeListTag{})
template <class Device>
class BenchmarkAtomicArray
{
public:
using Algo = vtkm::cont::DeviceAdapterAlgorithm<Device>;
using Timer = vtkm::cont::Timer<Device>;
// Benchmarks AtomicArray::Add such that each work index writes to adjacent
// indices.
template <typename ValueType>
struct BenchAddSeq
{
vtkm::Id ArraySize;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, PortalType portal)
: ArraySize(arraySize)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const { this->Portal.Add(i % this->ArraySize, 1); }
};
BenchAddSeq(vtkm::Id arraySize)
: ArraySize(arraySize)
{
this->Data.PrepareForOutput(this->ArraySize, Device{});
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::AtomicArray<ValueType> array(this->Data);
auto portal = array.PrepareForExecution(Device{});
Worker<decltype(portal)> worker{ this->ArraySize, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "Add (Seq, Atomic, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(AddSeq, BenchAddSeq);
// Provides a non-atomic baseline for BenchAddSeq
template <typename ValueType>
struct BenchAddSeqBaseline
{
vtkm::Id ArraySize;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, PortalType portal)
: ArraySize(arraySize)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
vtkm::Id idx = i % this->ArraySize;
this->Portal.Set(idx, this->Portal.Get(idx) + 1);
}
};
BenchAddSeqBaseline(vtkm::Id arraySize)
: ArraySize(arraySize)
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
Worker<decltype(portal)> worker{ this->ArraySize, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "Add (Seq, Baseline, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(AddSeqBase, BenchAddSeqBaseline);
// Benchmarks AtomicArray::Add such that each work index writes to a strided
// index ( floor(i / stride) + stride * (i % stride)
template <typename ValueType>
struct BenchAddStride
{
vtkm::Id ArraySize;
vtkm::Id Stride;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
vtkm::Id Stride;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
: ArraySize(arraySize)
, Stride(stride)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
this->Portal.Add(idx % this->ArraySize, 1);
}
};
BenchAddStride(vtkm::Id arraySize, vtkm::Id stride = 32)
: ArraySize(arraySize)
, Stride(stride)
{
this->Data.PrepareForOutput(this->ArraySize, Device{});
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::AtomicArray<ValueType> array(this->Data);
auto portal = array.PrepareForExecution(Device{});
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "Add (Stride=" << this->Stride << ", Atomic, " << std::setw(7) << std::setfill('0')
<< this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(AddStride, BenchAddStride);
// Non-atomic baseline for AddStride
template <typename ValueType>
struct BenchAddStrideBaseline
{
vtkm::Id ArraySize;
vtkm::Id Stride;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
vtkm::Id Stride;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
: ArraySize(arraySize)
, Stride(stride)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
this->Portal.Set(idx, this->Portal.Get(idx) + 1);
}
};
BenchAddStrideBaseline(vtkm::Id arraySize, vtkm::Id stride = 32)
: ArraySize(arraySize)
, Stride(stride)
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
Worker<decltype(portal)> worker{ this->ArraySize, this->Stride, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "Add (Stride=" << this->Stride << ", Baseline, " << std::setw(7) << std::setfill('0')
<< this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(AddStrideBase, BenchAddStrideBaseline);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
// indices.
template <typename ValueType>
struct BenchCASSeq
{
vtkm::Id ArraySize;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, PortalType portal)
: ArraySize(arraySize)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
vtkm::Id idx = i % this->ArraySize;
ValueType val = static_cast<ValueType>(i);
// Get the old val with a no-op
ValueType oldVal = this->Portal.Add(idx, static_cast<ValueType>(0));
ValueType assumed = static_cast<ValueType>(0);
do
{
assumed = oldVal;
oldVal = this->Portal.CompareAndSwap(idx, assumed + val, assumed);
} while (assumed != oldVal);
}
};
BenchCASSeq(vtkm::Id arraySize)
: ArraySize(arraySize)
{
this->Data.PrepareForOutput(this->ArraySize, Device{});
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::AtomicArray<ValueType> array(this->Data);
auto portal = array.PrepareForExecution(Device{});
Worker<decltype(portal)> worker{ this->ArraySize, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "CAS (Seq, Atomic, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(CASSeq, BenchCASSeq);
// Provides a non-atomic baseline for BenchCASSeq
template <typename ValueType>
struct BenchCASSeqBaseline
{
vtkm::Id ArraySize;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, PortalType portal)
: ArraySize(arraySize)
, Portal(portal)
{
}
VTKM_EXEC
void operator()(vtkm::Id i) const
{
vtkm::Id idx = i % this->ArraySize;
ValueType val = static_cast<ValueType>(i);
ValueType oldVal = this->Portal.Get(idx);
this->Portal.Set(idx, oldVal + val);
}
};
BenchCASSeqBaseline(vtkm::Id arraySize)
: ArraySize(arraySize)
{
}
VTKM_CONT
vtkm::Float64 operator()()
{
auto portal = this->Data.PrepareForOutput(this->ArraySize, Device{});
Worker<decltype(portal)> worker{ this->ArraySize, portal };
Timer timer;
Algo::Schedule(worker, NumWrites);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::ostringstream desc;
desc << "CAS (Seq, Baseline, " << std::setw(7) << std::setfill('0') << this->ArraySize << ")";
return desc.str();
}
};
MAKE_ATOMIC_BENCHMARKS(CASSeqBase, BenchCASSeqBaseline);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
// a strided index:
// ( floor(i / stride) + stride * (i % stride)
template <typename ValueType>
struct BenchCASStride
{
vtkm::Id ArraySize;
vtkm::Id Stride;
vtkm::cont::ArrayHandle<ValueType> Data;
template <typename PortalType>
struct Worker : public vtkm::exec::FunctorBase
{
vtkm::Id ArraySize;
vtkm::Id Stride;
PortalType Portal;
VTKM_CONT
Worker(vtkm::Id arraySize, vtkm::Id stride, PortalType portal)
: ArraySize(arraySize)