Commit e040a9fa authored by Nghia Truong's avatar Nghia Truong
Browse files

REFAC: Move parallel helper functions into ParallelUtils namespace, add more...

REFAC: Move parallel helper functions into ParallelUtils namespace, add more documentation and change some function names
parent 2a8cf73a
......@@ -27,6 +27,8 @@
namespace imstk
{
namespace ParallelUtils
{
///
/// \brief Perform an atomic operation: target = f(target, operand)
///
......@@ -76,7 +78,7 @@ void imstk_atomic_multiply(T& target, const T operand)
/// \brief Atomic division for scalar numbers: target = target / operand
///
template<class T>
void imstk_atomic_divide(T& target, T operand)
void imstk_atomic_divide(T& target, const T operand)
{
imstk_atomic_op(target, operand, [](T a, T b) { return a / b; });
}
......@@ -128,4 +130,5 @@ void imstk_atomic_divide(Eigen::Matrix<T, N, 1>& target, const T operand)
imstk_atomic_divide(target[i], operand);
}
}
}// end namespace namespace ParallelUtils
}// end namespace imstk
......@@ -25,18 +25,20 @@
namespace imstk
{
namespace ParallelUtils
{
///
/// \brief Execute a function in parallel over a range [beginIdx, endIdx) of indices
///
template<class IndexType, class Function>
void imstk_parallel_for(IndexType beginIdx, IndexType endIdx, Function&& function)
void parallelFor(const IndexType beginIdx, const IndexType endIdx, Function&& function)
{
tbb::parallel_for(tbb::blocked_range<IndexType>(beginIdx, endIdx),
[&](const tbb::blocked_range<IndexType>& r) {
for(IndexType i = r.begin(), iEnd = r.end(); i < iEnd; ++i)
{
function(i);
}
for(IndexType i = r.begin(), iEnd = r.end(); i < iEnd; ++i)
{
function(i);
}
});
}
......@@ -44,110 +46,111 @@ void imstk_parallel_for(IndexType beginIdx, IndexType endIdx, Function&& functio
/// \brief Execute a function in parallel over a range [0, endIdx) of indices
///
template<class IndexType, class Function>
void imstk_parallel_for(IndexType endIdx, Function&& function)
void parallelFor(const IndexType endIdx, Function&& function)
{
imstk_parallel_for(IndexType(0), endIdx, std::forward<Function>(function));
parallelFor(IndexType(0), endIdx, std::forward<Function>(function));
}
///
/// \brief Execute a 2D function in parallel over a range of indices in the x dimension,
/// while indices in the y dimension are scanned sequentially
/// indices in the y dimension are scanned sequentially
///
template<class IndexType, class Function>
void imstk_parallel_for_2Dx(IndexType beginX, IndexType endX,
IndexType beginY, IndexType endY,
Function&& function)
void parallelFor2Dx(const IndexType beginX, const IndexType endX,
const IndexType beginY, const IndexType endY,
Function&& function)
{
imstk_parallel_for(beginX, endX,
parallelFor(beginX, endX,
[&](IndexType i) {
for(IndexType j = beginY; j < endY; ++j)
{
function(i, j);
}
for(IndexType j = beginY; j < endY; ++j)
{
function(i, j);
}
});
}
///
/// \brief Execute a 2D function in parallel over a range of indices in the y dimension,
/// while indices in the x dimension are scanned sequentially
/// indices in the x dimension are scanned sequentially
///
template<class IndexType, class Function>
void imstk_parallel_for_2Dy(IndexType beginX, IndexType endX,
IndexType beginY, IndexType endY,
Function&& function)
void parallelFor2Dy(const IndexType beginX, const IndexType endX,
const IndexType beginY, const IndexType endY,
Function&& function)
{
imstk_parallel_for(beginY, endY,
parallelFor(beginY, endY,
[&](IndexType j) {
for(IndexType i = beginX; i < endX; ++i)
{
function(i, j);
}
for(IndexType i = beginX; i < endX; ++i)
{
function(i, j);
}
});
}
///
/// \brief Execute a 3D function in parallel over a range of indices in the x dimension,
/// while indices in the y and z dimensions are scanned sequentially
/// indices in the y and z dimensions are scanned sequentially
///
template<class IndexType, class Function>
void imstk_parallel_for_3Dx(IndexType beginX, IndexType endX,
IndexType beginY, IndexType endY,
IndexType beginZ, IndexType endZ,
Function&& function)
void parallelFor3Dx(const IndexType beginX, const IndexType endX,
const IndexType beginY, const IndexType endY,
const IndexType beginZ, const IndexType endZ,
Function&& function)
{
imstk_parallel_for(beginX, endX,
parallelFor(beginX, endX,
[&](IndexType i) {
for(IndexType j = beginY; j < endY; ++j)
{
for(IndexType k = beginZ; k < endZ; ++k)
for(IndexType j = beginY; j < endY; ++j)
{
function(i, j, k);
for(IndexType k = beginZ; k < endZ; ++k)
{
function(i, j, k);
}
}
}
});
}
///
/// \brief Execute a 3D function in parallel over a range of indices in the y dimension,
/// while indices in the x and z dimensions are scanned sequentially
/// indices in the x and z dimensions are scanned sequentially
///
template<class IndexType, class Function>
void imstk_parallel_for_3Dy(IndexType beginX, IndexType endX,
IndexType beginY, IndexType endY,
IndexType beginZ, IndexType endZ,
Function&& function)
void parallelFor3Dy(const IndexType beginX, const IndexType endX,
const IndexType beginY, const IndexType endY,
const IndexType beginZ, const IndexType endZ,
Function&& function)
{
imstk_parallel_for(beginY, endY,
parallelFor(beginY, endY,
[&](IndexType j) {
for(IndexType i = beginX; i < endX; ++i)
{
for(IndexType k = beginZ; k < endZ; ++k)
for(IndexType i = beginX; i < endX; ++i)
{
function(i, j, k);
for(IndexType k = beginZ; k < endZ; ++k)
{
function(i, j, k);
}
}
}
});
}
///
/// \brief Execute a 3D function in parallel over a range of indices in the z dimension,
/// while indices in the x and y dimensions are scanned sequentially
/// indices in the x and y dimensions are scanned sequentially
///
template<class IndexType, class Function>
void imstk_parallel_for_3Dz(IndexType beginX, IndexType endX,
IndexType beginY, IndexType endY,
IndexType beginZ, IndexType endZ,
Function&& function)
void parallelFor3Dz(const IndexType beginX, const IndexType endX,
const IndexType beginY, const IndexType endY,
const IndexType beginZ, const IndexType endZ,
Function&& function)
{
imstk_parallel_for(beginX, endX,
parallelFor(beginX, endX,
[&](IndexType i) {
for(IndexType j = beginY; j < endY; ++j)
{
for(IndexType k = beginZ; k < endZ; ++k)
for(IndexType j = beginY; j < endY; ++j)
{
function(i, j, k);
for(IndexType k = beginZ; k < endZ; ++k)
{
function(i, j, k);
}
}
}
});
}
} // end namespace ParallelUtils
} // end namespace imstk
......@@ -30,22 +30,22 @@
namespace imstk
{
namespace ParallelUtils
{
///
/// \brief The ParallelReduce class
/// \brief A class for executing reduce operations in parallel
///
class ParallelReduce
{
using Vec3r = Eigen::Matrix<Real, 3, 1>;
using StdVT_Vec3r = std::vector<Vec3r, Eigen::aligned_allocator<Vec3r>>;
///
/// \brief Private helper class for finding max norm
/// \brief Private helper class, providing operator() using in std::parallel_reduce
/// for finding max L2 norm of an array of Vec3r
///
class ParallelMaxNorm
{
public:
ParallelMaxNorm(const StdVT_Vec3r& data) : m_Data(data) {}
ParallelMaxNorm(const StdVectorOfVec3r& data) : m_Data(data) {}
ParallelMaxNorm(ParallelMaxNorm& pObj, tbb::split) : m_Data(pObj.m_Data) {}
void operator()(const tbb::blocked_range<size_t>& r)
......@@ -62,16 +62,17 @@ public:
private:
Real m_Result = 0;
const StdVT_Vec3r& m_Data;
const StdVectorOfVec3r& m_Data;
};
///
/// \brief Private helper class for finding AABB of a point set
/// \brief Private helper class, providing operator() using in std::parallel_reduce
/// for finding axis-aligned bounding box of a point set
///
class ParallelAABB
{
public:
ParallelAABB(const StdVT_Vec3r& data) : m_Data(data) { if(data.size() > 0) { m_UpperCorner = data[0]; } }
ParallelAABB(const StdVectorOfVec3r& data) : m_Data(data) { if(data.size() > 0) { m_UpperCorner = data[0]; } }
ParallelAABB(ParallelAABB& pObj, tbb::split) : m_Data(pObj.m_Data) {}
void operator()(const tbb::blocked_range<size_t>& r)
......@@ -106,14 +107,14 @@ private:
Vec3r m_UpperCorner = Vec3r(-std::numeric_limits<Real>::max(),
-std::numeric_limits<Real>::max(),
-std::numeric_limits<Real>::max());
const StdVT_Vec3r& m_Data;
const StdVectorOfVec3r& m_Data;
};
public:
///
/// \brief Find the maximum value of vector magnitude (|| v ||) for each vector v in the input data array
/// \brief Find the maximum value of L2 norm for each vector v of type Vec3r in the input data array
///
static Real getMaxNorm(const StdVT_Vec3r& data)
static Real getMaxL2Norm(const StdVectorOfVec3r& data)
{
ParallelMaxNorm pObj(data);
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, data.size()), pObj);
......@@ -123,7 +124,7 @@ public:
///
/// \brief Find the bounding box of a point set
///
static void getAABB(const StdVT_Vec3r& points, Vec3r& lowerCorner, Vec3r& upperCorner)
static void findAABB(const StdVectorOfVec3r& points, Vec3r& lowerCorner, Vec3r& upperCorner)
{
ParallelAABB pObj(points);
tbb::parallel_reduce(tbb::blocked_range<size_t>(0, points.size()), pObj);
......@@ -131,4 +132,5 @@ public:
upperCorner = pObj.getUpperCorner();
}
};
} // end namespace ParallelUtils
} // end namespace imstk
......@@ -26,17 +26,39 @@
namespace imstk
{
class ParallelSpinLock {
namespace ParallelUtils
{
///
/// \brief The ParallelSpinLock class
/// \brief A SpinLock is a light weight mutex,
/// which can be safely locked and unlocked exclusively by only one thread at a time
///
class ParallelSpinLock
{
public:
///
/// \brief Constructor
///
ParallelSpinLock() = default;
///
/// \brief Copy constructor, must be implemented as an empty function
/// because the member variable of type std::atomic_flag has copy constructor deleted
///
ParallelSpinLock(const ParallelSpinLock&) {}
ParallelSpinLock& operator=(const ParallelSpinLock&) { return *this; }
///
/// \brief Start a thread-safe region, where only one thread can execute at a time until
/// a call to the unlock function
///
void lock()
{
while(m_Lock.test_and_set(std::memory_order_acquire)) {}
}
///
/// \brief End a thread-safe region
///
void unlock()
{
m_Lock.clear(std::memory_order_release);
......@@ -45,4 +67,5 @@ public:
private:
std::atomic_flag m_Lock = ATOMIC_FLAG_INIT;
};
} // end namespace ParallelUtils
} // end namespace imstk
......@@ -24,14 +24,14 @@
namespace imstk
{
namespace ParallelUtils
{
std::unique_ptr<tbb::global_control> ThreadManager::s_tbbGlobalControl;
void ThreadManager::setNumberThreads(int nThreads)
void ThreadManager::setThreadPoolSize(const size_t nThreads)
{
if(nThreads <= 0)
{
LOG(FATAL) << "Invalid number of threads";
}
LOG_IF(FATAL, (nThreads == 0)) << "Invalid number of threads";
LOG(INFO) << "Set number of worker threads to " << nThreads;
if(s_tbbGlobalControl)
{
......@@ -40,12 +40,11 @@ void ThreadManager::setNumberThreads(int nThreads)
s_tbbGlobalControl = std::unique_ptr<tbb::global_control>(
new tbb::global_control(tbb::global_control::max_allowed_parallelism,
static_cast<size_t>(nThreads)));
LOG(INFO) << "Set number of worker threads: " << nThreads;
}
void ThreadManager::setMaximumParallelism()
{
setNumberThreads(tbb::task_scheduler_init::default_num_threads());
setThreadPoolSize(static_cast<size_t>(tbb::task_scheduler_init::default_num_threads()));
}
} // end namespace ParallelUtils
} // end namespace imstk
......@@ -30,6 +30,8 @@
namespace imstk
{
namespace ParallelUtils
{
class ThreadManager
{
public:
......@@ -37,7 +39,7 @@ public:
///
/// \brief Set system-wide number of threads for parallel computation
///
static void setNumberThreads(int nThreads);
static void setThreadPoolSize(const size_t nThreads);
///
/// \brief Set system-wide number of threads to system default (use all logical cores)
......@@ -50,4 +52,5 @@ private:
///
static std::unique_ptr<tbb::global_control> s_tbbGlobalControl;
};
} // end namespace ParallelUtils
} // end namespace imstk
......@@ -20,7 +20,7 @@
=========================================================================*/
#include "imstkGridBasedNeighborSearch.h"
#include "imstkParallelHelpers.h"
#include "imstkParallelUtils.h"
#include <g3log/g3log.hpp>
namespace imstk
......@@ -51,7 +51,7 @@ void GridBasedNeighborSearch::getNeighbors(std::vector<std::vector<size_t>>& res
// firstly compute the bounding box of points in setB
Vec3r lowerCorner;
Vec3r upperCorner;
ParallelReduce::getAABB(setB, lowerCorner, upperCorner);
ParallelUtils::ParallelReduce::findAABB(setB, lowerCorner, upperCorner);
// the upper corner need to be expanded a bit, to avoid round-off error during computation
upperCorner += Vec3d(m_SearchRadius, m_SearchRadius, m_SearchRadius) * Real(0.1);
......@@ -60,14 +60,14 @@ void GridBasedNeighborSearch::getNeighbors(std::vector<std::vector<size_t>>& res
m_Grid.initialize(lowerCorner, upperCorner, m_SearchRadius);
// clear all particle lists in each grid cell
imstk_parallel_for(m_Grid.getAllCellData().size(),
ParallelUtils::parallelFor(m_Grid.getAllCellData().size(),
[&](const size_t cellIdx)
{
m_Grid.getCellData(cellIdx).particleIndices.resize(0);
});
// collect particle indices of points in setB into their corresponding cells
imstk_parallel_for(setB.size(),
ParallelUtils::parallelFor(setB.size(),
[&](const size_t p)
{
auto& cellData = m_Grid.getCellData(setB[p]);
......@@ -78,7 +78,7 @@ void GridBasedNeighborSearch::getNeighbors(std::vector<std::vector<size_t>>& res
// for each point in setA, collect setB neighbors within the search radius
result.resize(setA.size());
imstk_parallel_for(setA.size(),
ParallelUtils::parallelFor(setA.size(),
[&](const size_t p)
{
auto& pneighbors = result[p];
......
......@@ -82,7 +82,7 @@ private:
struct CellData
{
std::vector<size_t> particleIndices; // Store list of particles
ParallelSpinLock lock; // An atomic lock for thread-safe writing
ParallelUtils::ParallelSpinLock lock; // An atomic lock for thread-safe writing
};
UniformSpatialGrid<CellData> m_Grid;
};
......
......@@ -23,7 +23,7 @@
#include "imstkGridBasedNeighborSearch.h"
#include "imstkSpatialHashTableSeparateChaining.h"
#include "imstkNeighborSearch.h"
#include "imstkParallelHelpers.h"
#include "imstkParallelUtils.h"
namespace imstk
{
......@@ -79,7 +79,7 @@ void NeighborSearch::getNeighbors(std::vector<std::vector<size_t>>& result, cons
m_SpatialHashSearcher->clear();
m_SpatialHashSearcher->insertPoints(setB);
imstk_parallel_for(setA.size(),
ParallelUtils::parallelFor(setA.size(),
[&](const size_t p) {
// For each point in setA, find neighbors in setB
m_SpatialHashSearcher->getPointsInSphere(result[p], setA[p], m_SearchRadius);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment