Commit b168c200 authored by Nickolas Davis's avatar Nickolas Davis 🥑 Committed by Kitware Robot
Browse files

Merge topic 'openmp-runtime-device-config'

277c3360 implement openmp runtime device configuration testing
1c4d6810

 implement openmp runtime device configuration and update code to use it
Acked-by: Kitware Robot's avatarKitware Robot <kwrobot@kitware.com>
Acked-by: Kenneth Moreland's avatarKenneth Moreland <morelandkd@ornl.gov>
Merge-request: !2573
parents ff719757 277c3360
......@@ -937,9 +937,6 @@ void InitDataSet(int& argc, char** argv)
tbb::task_scheduler_init init((numThreads > 0) ? numThreads
: tbb::task_scheduler_init::automatic);
#endif
#endif
#ifdef VTKM_ENABLE_OPENMP
omp_set_num_threads((numThreads > 0) ? numThreads : omp_get_max_threads());
#endif
// Now go back through the arg list and remove anything that is not in the list of
......
......@@ -97,16 +97,10 @@ public:
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
CopyIfHelper helper;
helper.Initialize(inSize, sizeof(T));
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
// Calls omp_get_num_threads, thus must be used inside a parallel section.
helper.Initialize(inSize, sizeof(T));
}
VTKM_OPENMP_DIRECTIVE(for schedule(static))
for (vtkm::Id i = 0; i < helper.NumChunks; ++i)
{
......
......@@ -10,6 +10,7 @@
#ifndef vtk_m_cont_openmp_internal_FunctorsOpenMP_h
#define vtk_m_cont_openmp_internal_FunctorsOpenMP_h
#include <vtkm/cont/RuntimeDeviceInformation.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
......@@ -165,8 +166,11 @@ static void CopyHelper(InPortalT inPortal,
// Evenly distribute full pages to all threads. We manually chunk the
// data here so that we can exploit std::copy's memmove optimizations.
vtkm::Id numChunks;
ComputeChunkSize(
numVals, omp_get_num_threads(), 8, sizeof(InValueT), numChunks, valuesPerChunk);
vtkm::Id numThreads;
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(numThreads);
ComputeChunkSize(numVals, numThreads, 8, sizeof(InValueT), numChunks, valuesPerChunk);
}
VTKM_OPENMP_DIRECTIVE(for schedule(static))
......@@ -193,7 +197,9 @@ struct CopyIfHelper
void Initialize(vtkm::Id numValues, vtkm::Id valueSize)
{
this->NumValues = numValues;
this->NumThreads = static_cast<vtkm::Id>(omp_get_num_threads());
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(this->NumThreads);
this->ValueSize = valueSize;
// Evenly distribute pages across the threads. We manually chunk the
......@@ -326,18 +332,19 @@ struct ReduceHelper
auto data = vtkm::cont::ArrayPortalToIteratorBegin(portal);
bool doParallel = false;
int numThreads = 0;
vtkm::Id numThreads = 0;
std::unique_ptr<ReturnType[]> threadData;
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(f) shared(
data, doParallel, numThreads, threadData) VTKM_OPENMP_SHARED_CONST(numVals))
{
int tid = omp_get_thread_num();
VTKM_OPENMP_DIRECTIVE(single)
{
numThreads = omp_get_num_threads();
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(numThreads);
if (numVals >= numThreads * 2)
{
doParallel = true;
......@@ -532,7 +539,10 @@ void ReduceByKeyHelper(KeysInArray keysInArray,
shared(outIdx) VTKM_OPENMP_SHARED_CONST(numValues))
{
int tid = omp_get_thread_num();
int numThreads = omp_get_num_threads();
vtkm::Id numThreads = 0;
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(numThreads);
// Determine bounds for this thread's scan operation:
vtkm::Id chunkSize = (numValues + numThreads - 1) / numThreads;
......@@ -677,7 +687,11 @@ private:
void Prepare()
{
// Figure out how many values each thread should handle:
int numThreads = omp_get_num_threads();
vtkm::Id numThreads = 0;
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(numThreads);
vtkm::Id chunksPerThread = 8;
vtkm::Id numChunks;
ComputeChunkSize(
......
......@@ -10,6 +10,9 @@
#include <vtkm/cont/internal/ParallelRadixSort.h>
#include <vtkm/cont/RuntimeDeviceInformation.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <omp.h>
namespace vtkm
......@@ -27,20 +30,11 @@ struct RadixThreaderOpenMP
{
size_t GetAvailableCores() const
{
size_t result;
if (omp_in_parallel())
{
result = static_cast<size_t>(omp_get_num_threads());
}
else
{
#pragma omp parallel
{
result = static_cast<size_t>(omp_get_num_threads());
}
}
return result;
vtkm::Id result;
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(result);
return static_cast<size_t>(result);
}
template <typename TaskType>
......
......@@ -8,6 +8,7 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/RuntimeDeviceInformation.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
......@@ -153,7 +154,10 @@ private:
{
// Figure out how many values each thread should handle:
vtkm::Id numVals = range[1] - range[0];
int numThreads = omp_get_num_threads();
vtkm::Id numThreads = 0;
vtkm::cont::RuntimeDeviceInformation{}
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagOpenMP())
.GetThreads(numThreads);
vtkm::Id chunksPerThread = 8;
vtkm::Id numChunks;
ComputeChunkSize(
......
......@@ -13,6 +13,12 @@
#include <vtkm/cont/internal/RuntimeDeviceConfiguration.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/Logging.h>
VTKM_THIRDPARTY_PRE_INCLUDE
#include <omp.h>
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm
{
namespace cont
......@@ -24,39 +30,72 @@ template <>
class RuntimeDeviceConfiguration<vtkm::cont::DeviceAdapterTagOpenMP>
: public vtkm::cont::internal::RuntimeDeviceConfigurationBase
{
VTKM_CONT vtkm::cont::DeviceAdapterId GetDevice() const override final
public:
RuntimeDeviceConfiguration<vtkm::cont::DeviceAdapterTagOpenMP>()
: HardwareMaxThreads(InitializeHardwareMaxThreads())
, CurrentNumThreads(this->HardwareMaxThreads)
{
return vtkm::cont::DeviceAdapterTagOpenMP{};
}
VTKM_CONT virtual RuntimeDeviceConfigReturnCode SetThreads(const vtkm::Id&) override final
VTKM_CONT vtkm::cont::DeviceAdapterId GetDevice() const override final
{
// TODO: Set the threads in OpenMP
return RuntimeDeviceConfigReturnCode::SUCCESS;
return vtkm::cont::DeviceAdapterTagOpenMP{};
}
VTKM_CONT virtual RuntimeDeviceConfigReturnCode SetNumaRegions(const vtkm::Id&) override final
VTKM_CONT virtual RuntimeDeviceConfigReturnCode SetThreads(const vtkm::Id& value) override final
{
// TODO: Set the numa regions in OpenMP
if (omp_in_parallel())
{
VTKM_LOG_S(vtkm::cont::LogLevel::Error, "OpenMP SetThreads: Error, currently in parallel");
return RuntimeDeviceConfigReturnCode::NOT_APPLIED;
}
if (value > 0)
{
if (value > this->HardwareMaxThreads)
{
VTKM_LOG_S(vtkm::cont::LogLevel::Warn,
"OpenMP: You may be oversubscribing your CPU cores: "
<< "process threads available: " << this->HardwareMaxThreads
<< ", requested threads: " << value);
}
this->CurrentNumThreads = value;
omp_set_num_threads(this->CurrentNumThreads);
}
else
{
this->CurrentNumThreads = this->HardwareMaxThreads;
omp_set_num_threads(this->CurrentNumThreads);
}
return RuntimeDeviceConfigReturnCode::SUCCESS;
}
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetThreads(vtkm::Id&) const override final
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetThreads(vtkm::Id& value) const override final
{
// TODO: Get the number of OpenMP threads
value = this->CurrentNumThreads;
return RuntimeDeviceConfigReturnCode::SUCCESS;
}
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetNumaRegions(vtkm::Id&) const override final
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetMaxThreads(
vtkm::Id& value) const override final
{
// TODO: Get the number of OpenMP NumaRegions
value = this->HardwareMaxThreads;
return RuntimeDeviceConfigReturnCode::SUCCESS;
}
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetMaxThreads(vtkm::Id&) const override final
private:
VTKM_CONT vtkm::Id InitializeHardwareMaxThreads() const
{
return RuntimeDeviceConfigReturnCode::SUCCESS;
vtkm::Id count = 0;
VTKM_OPENMP_DIRECTIVE(parallel)
{
VTKM_OPENMP_DIRECTIVE(atomic)
++count;
}
return count;
}
vtkm::Id HardwareMaxThreads;
vtkm::Id CurrentNumThreads;
};
} // namespace vtkm::cont::internal
} // namespace vtkm::cont
......
......@@ -23,6 +23,7 @@ set(unit_tests
UnitTestOpenMPDeviceAdapter.cxx
UnitTestOpenMPImplicitFunction.cxx
UnitTestOpenMPPointLocatorSparseGrid.cxx
UnitTestOpenMPRuntimeDeviceConfiguration.cxx
)
if (NOT VTKm_NO_DEPRECATED_VIRTUAL)
......
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingRuntimeDeviceConfiguration.h>
namespace internal = vtkm::cont::internal;
namespace vtkm
{
namespace cont
{
namespace testing
{
template <>
VTKM_CONT void
TestingRuntimeDeviceConfiguration<vtkm::cont::DeviceAdapterTagOpenMP>::TestRuntimeConfig()
{
auto deviceOptions = TestingRuntimeDeviceConfiguration::DefaultInitializeConfigOptions();
vtkm::Id maxThreads = 0;
vtkm::Id numThreads = 0;
VTKM_OPENMP_DIRECTIVE(parallel)
{
maxThreads = omp_get_max_threads();
numThreads = omp_get_num_threads();
}
VTKM_TEST_ASSERT(maxThreads == numThreads,
"openMP by default maxthreads should == numthreads " +
std::to_string(maxThreads) + " != " + std::to_string(numThreads));
numThreads = numThreads / 2;
deviceOptions.VTKmNumThreads.SetOption(numThreads);
auto& config =
RuntimeDeviceInformation{}.GetRuntimeConfiguration(DeviceAdapterTagOpenMP(), deviceOptions);
vtkm::Id setNumThreads;
vtkm::Id setMaxThreads;
VTKM_OPENMP_DIRECTIVE(parallel) { numThreads = omp_get_num_threads(); }
VTKM_TEST_ASSERT(config.GetThreads(setNumThreads) ==
internal::RuntimeDeviceConfigReturnCode::SUCCESS,
"Failed to get num threads");
VTKM_TEST_ASSERT(setNumThreads == numThreads,
"RTC's numThreads != numThreads openmp direct! " +
std::to_string(setNumThreads) + " != " + std::to_string(numThreads));
VTKM_TEST_ASSERT(config.GetMaxThreads(setMaxThreads) ==
internal::RuntimeDeviceConfigReturnCode::SUCCESS,
"Failed to get max threads");
VTKM_TEST_ASSERT(setMaxThreads == maxThreads,
"RTC's maxThreads != maxThreads openmp direct! " +
std::to_string(setMaxThreads) + " != " + std::to_string(maxThreads));
}
} // namespace vtkm::cont::testing
} // namespace vtkm::cont
} // namespace vtkm
int UnitTestOpenMPRuntimeDeviceConfiguration(int argc, char* argv[])
{
return vtkm::cont::testing::TestingRuntimeDeviceConfiguration<
vtkm::cont::DeviceAdapterTagOpenMP>::Run(argc, argv);
}
......@@ -13,7 +13,9 @@
#include <vtkm/cont/CellLocatorBoundingIntervalHierarchy.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/RuntimeDeviceInformation.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/exec/CellInterpolate.h>
#include <vtkm/exec/ParametricCoordinates.h>
......@@ -93,7 +95,11 @@ void RunTest()
//cpu usage it will fail, so we limit the number of threads
//to avoid the test timing out
#ifdef VTKM_ENABLE_OPENMP
omp_set_num_threads(std::min(4, omp_get_max_threads()));
auto& runtimeConfig = vtkm::cont::RuntimeDeviceInformation{}.GetRuntimeConfiguration(
vtkm::cont::DeviceAdapterTagOpenMP());
vtkm::Id maxThreads = 0;
runtimeConfig.GetMaxThreads(maxThreads);
runtimeConfig.SetThreads(std::min(static_cast<vtkm::Id>(4), maxThreads));
#endif
TestBoundingIntervalHierarchy(ConstructDataSet(8), 3);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment