Commit 9b544471 authored by Utkarsh Ayachit's avatar Utkarsh Ayachit
Browse files

Add `diy`

parent 1f989b1f
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2017 UT-Battelle, LLC.
## Copyright 2017 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##
##=============================================================================
#==============================================================================
# See License.txt
#==============================================================================
add_library(diy INTERFACE)
# diy needs C++11
target_compile_features(diy INTERFACE cxx_auto_type)
target_include_directories(diy INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}>)
# presently, this dependency is required. Make it optional in the future.
set(arg)
foreach(apath IN LISTS MPI_C_INCLUDE_PATH MPI_CXX_INCLUDE_PATH)
list(APPEND arg $<BUILD_INTERFACE:${apath}>)
endforeach()
target_include_directories(diy INTERFACE ${arg})
target_link_libraries(diy INTERFACE
$<BUILD_INTERFACE:${MPI_C_LIBRARIES}>
$<BUILD_INTERFACE:${MPI_CXX_LIBRARIES}>)
if(MPI_C_COMPILE_DEFINITIONS)
target_compile_definitions(diy INTERFACE
$<$<COMPILE_LANGUAGE:C>:${MPI_C_COMPILE_DEFINITIONS}>)
endif()
if(MPI_CXX_COMPILE_DEFNITIONS)
target_compile_definitions(diy INTERFACE
$<$<COMPILE_LANGUAGE:CXX>:${MPI_CXX_COMPILE_DEFNITIONS>)
endif()
install(TARGETS diy
EXPORT ${VTKm_EXPORT_NAME})
# Install headers
install(DIRECTORY include/diy
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR})
# Install other files.
install(FILES LEGAL.txt LICENSE.txt
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/diy
)
Copyright Notice
DIY2, Copyright (c) 2015, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from the U.S. Dept. of Energy). All rights reserved.
If you have questions about your rights to use or distribute this software,
please contact Berkeley Lab's Technology Transfer Department at TTD@lbl.gov.
NOTICE. This software is owned by the U.S. Department of Energy. As such, the
U.S. Government has been granted for itself and others acting on its behalf a
paid-up, nonexclusive, irrevocable, worldwide license in the Software to
reproduce, prepare derivative works, and perform publicly and display publicly.
Beginning five (5) years after the date permission to assert copyright is
obtained from the U.S. Department of Energy, and subject to any subsequent five
(5) year renewals, the U.S. Government is granted for itself and others acting
on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the
Software to reproduce, prepare derivative works, distribute copies to the
public, perform publicly and display publicly, and to permit others to do so.
License Agreement
"DIY2, Copyright (c) 2015, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from the U.S. Dept. of Energy). All rights reserved."
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
(1) Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
(2) Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
(3) Neither the name of the University of California, Lawrence Berkeley National
Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used
to endorse or promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You are under no obligation whatsoever to provide any bug fixes, patches, or
upgrades to the features, functionality or performance of the source code
("Enhancements") to anyone; however, if you choose to make your Enhancements
available either publicly, or directly to Lawrence Berkeley National Laboratory,
without imposing a separate written license agreement for such Enhancements,
then you hereby grant the following license: a non-exclusive, royalty-free
perpetual license to install, use, modify, prepare derivative works, incorporate
into other computer software, distribute, and sublicense such enhancements or
derivative works thereof, in binary and source code form.
#ifndef DIY_ALGORITHMS_HPP
#define DIY_ALGORITHMS_HPP
#include <vector>
#include "master.hpp"
#include "assigner.hpp"
#include "reduce.hpp"
#include "reduce-operations.hpp"
#include "partners/swap.hpp"
#include "detail/algorithms/sort.hpp"
#include "detail/algorithms/kdtree.hpp"
#include "detail/algorithms/kdtree-sampling.hpp"
#include "log.hpp"
namespace diy
{
/**
* \ingroup Algorithms
* \brief sample sort `values` of each block, store the boundaries between blocks in `samples`
*/
template<class Block, class T, class Cmp>
void sort(Master& master, //!< master object
const Assigner& assigner, //!< assigner object
std::vector<T> Block::* values, //!< all values to sort
std::vector<T> Block::* samples, //!< (output) boundaries of blocks
size_t num_samples, //!< desired number of samples
const Cmp& cmp, //!< comparison function
int k = 2, //!< k-ary reduction will be used
bool samples_only = false) //!< false: results will be all_to_all exchanged; true: only sort but don't exchange results
{
bool immediate = master.immediate();
master.set_immediate(false);
// NB: although sorter will go out of scope, its member functions sample()
// and exchange() will return functors whose copies get saved inside reduce
detail::SampleSort<Block,T,Cmp> sorter(values, samples, cmp, num_samples);
// swap-reduce to all-gather samples
RegularDecomposer<DiscreteBounds> decomposer(1, interval(0,assigner.nblocks()), assigner.nblocks());
RegularSwapPartners partners(decomposer, k);
reduce(master, assigner, partners, sorter.sample(), detail::SkipIntermediate(partners.rounds()));
// all_to_all to exchange the values
if (!samples_only)
all_to_all(master, assigner, sorter.exchange(), k);
master.set_immediate(immediate);
}
/**
* \ingroup Algorithms
* \brief sample sort `values` of each block, store the boundaries between blocks in `samples`
* shorter version of above sort algorithm with the default less-than comparator used for T
* and all_to_all exchange included
*/
template<class Block, class T>
void sort(Master& master, //!< master object
const Assigner& assigner, //!< assigner object
std::vector<T> Block::* values, //!< all values to sort
std::vector<T> Block::* samples, //!< (output) boundaries of blocks
size_t num_samples, //!< desired number of samples
int k = 2) //!< k-ary reduction will be used
{
sort(master, assigner, values, samples, num_samples, std::less<T>(), k);
}
/**
* \ingroup Algorithms
* \brief build a kd-tree and sort a set of points into it (use histograms to determine split values)
*/
template<class Block, class Point>
void kdtree(Master& master, //!< master object
const Assigner& assigner, //!< assigner object
int dim, //!< dimensionality
const ContinuousBounds& domain, //!< global data extents
std::vector<Point> Block::* points, //!< input points to sort into kd-tree
size_t bins, //!< number of histogram bins for splitting a dimension
bool wrap = false)//!< periodic boundaries in all dimensions
{
if (assigner.nblocks() & (assigner.nblocks() - 1))
throw std::runtime_error(fmt::format("KD-tree requires a number of blocks that's a power of 2, got {}", assigner.nblocks()));
typedef diy::RegularContinuousLink RCLink;
for (size_t i = 0; i < master.size(); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
*link = RCLink(dim, domain, domain);
if (wrap) // set up the links to self
{
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
// left
dir[j] = -1; wrap_dir[j] = -1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
// right
dir[j] = 1; wrap_dir[j] = 1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
}
}
}
detail::KDTreePartition<Block,Point> kdtree_partition(dim, points, bins);
detail::KDTreePartners partners(dim, assigner.nblocks(), wrap, domain);
reduce(master, assigner, partners, kdtree_partition);
// update master.expected to match the links
int expected = 0;
for (size_t i = 0; i < master.size(); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}
/**
* \ingroup Algorithms
* \brief build a kd-tree and sort a set of points into it (use sampling to determine split values)
*/
template<class Block, class Point>
void kdtree_sampling
(Master& master, //!< master object
const Assigner& assigner, //!< assigner object
int dim, //!< dimensionality
const ContinuousBounds& domain, //!< global data extents
std::vector<Point> Block::* points, //!< input points to sort into kd-tree
size_t samples, //!< number of samples to take in each block
bool wrap = false)//!< periodic boundaries in all dimensions
{
if (assigner.nblocks() & (assigner.nblocks() - 1))
throw std::runtime_error(fmt::format("KD-tree requires a number of blocks that's a power of 2, got {}", assigner.nblocks()));
typedef diy::RegularContinuousLink RCLink;
for (size_t i = 0; i < master.size(); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
*link = RCLink(dim, domain, domain);
if (wrap) // set up the links to self
{
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
// left
dir[j] = -1; wrap_dir[j] = -1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
// right
dir[j] = 1; wrap_dir[j] = 1;
link->add_neighbor(self);
link->add_bounds(domain);
link->add_direction(dir);
link->add_wrap(wrap_dir);
}
}
}
detail::KDTreeSamplingPartition<Block,Point> kdtree_partition(dim, points, samples);
detail::KDTreePartners partners(dim, assigner.nblocks(), wrap, domain);
reduce(master, assigner, partners, kdtree_partition);
// update master.expected to match the links
int expected = 0;
for (size_t i = 0; i < master.size(); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}
}
#endif
#ifndef DIY_ASSIGNER_HPP
#define DIY_ASSIGNER_HPP
#include <vector>
namespace diy
{
// Derived types should define
// int rank(int gid) const
// that converts a global block id to a rank that it's assigned to.
class Assigner
{
public:
/**
* \ingroup Assignment
* \brief Manages how blocks are assigned to processes
*/
Assigner(int size, //!< total number of processes
int nblocks //!< total (global) number of blocks
):
size_(size), nblocks_(nblocks) {}
//! returns the total number of process ranks
int size() const { return size_; }
//! returns the total number of global blocks
int nblocks() const { return nblocks_; }
//! sets the total number of global blocks
void set_nblocks(int nblocks) { nblocks_ = nblocks; }
//! gets the local gids for a given process rank
virtual void local_gids(int rank, std::vector<int>& gids) const =0;
//! returns the process rank of the block with global id gid (need not be local)
virtual int rank(int gid) const =0;
private:
int size_; // total number of ranks
int nblocks_; // total number of blocks
};
class ContiguousAssigner: public Assigner
{
public:
/**
* \ingroup Assignment
* \brief Assigns blocks to processes in contiguous gid (block global id) order
*/
ContiguousAssigner(int size, //!< total number of processes
int nblocks //!< total (global) number of blocks
):
Assigner(size, nblocks) {}
using Assigner::size;
using Assigner::nblocks;
int rank(int gid) const override
{
int div = nblocks() / size();
int mod = nblocks() % size();
int r = gid / (div + 1);
if (r < mod)
{
return r;
} else
{
return mod + (gid - (div + 1)*mod)/div;
}
}
inline
void local_gids(int rank, std::vector<int>& gids) const override;
};
class RoundRobinAssigner: public Assigner
{
public:
/**
* \ingroup Assignment
* \brief Assigns blocks to processes in cyclic or round-robin gid (block global id) order
*/
RoundRobinAssigner(int size, //!< total number of processes
int nblocks //!< total (global) number of blocks
):
Assigner(size, nblocks) {}
using Assigner::size;
using Assigner::nblocks;
int rank(int gid) const override { return gid % size(); }
inline
void local_gids(int rank, std::vector<int>& gids) const override;
};
}
void
diy::ContiguousAssigner::
local_gids(int rank, std::vector<int>& gids) const
{
int div = nblocks() / size();
int mod = nblocks() % size();
int from, to;
if (rank < mod)
from = rank * (div + 1);
else
from = mod * (div + 1) + (rank - mod) * div;
if (rank + 1 < mod)
to = (rank + 1) * (div + 1);
else
to = mod * (div + 1) + (rank + 1 - mod) * div;
for (int gid = from; gid < to; ++gid)
gids.push_back(gid);
}
void
diy::RoundRobinAssigner::
local_gids(int rank, std::vector<int>& gids) const
{
int cur = rank;
while (cur < nblocks())
{
gids.push_back(cur);
cur += size();
}
}
#endif
#ifndef DIY_COLLECTION_HPP
#define DIY_COLLECTION_HPP
#include <vector>
#include "serialization.hpp"
#include "storage.hpp"
#include "thread.hpp"
namespace diy
{
class Collection
{
public:
typedef void* Element;
typedef std::vector<Element> Elements;
typedef critical_resource<int, recursive_mutex> CInt;
typedef void* (*Create)();
typedef void (*Destroy)(void*);
typedef detail::Save Save;
typedef detail::Load Load;
public:
Collection(Create create,
Destroy destroy,
ExternalStorage* storage,
Save save,
Load load):
create_(create),
destroy_(destroy),
storage_(storage),
save_(save),
load_(load),
in_memory_(0) {}
size_t size() const { return elements_.size(); }
const CInt& in_memory() const { return in_memory_; }
inline void clear();
int add(Element e) { elements_.push_back(e); external_.push_back(-1); ++(*in_memory_.access()); return elements_.size() - 1; }
void* release(int i) { void* e = get(i); elements_[i] = 0; return e; }
void* find(int i) const { return elements_[i]; } // possibly returns 0, if the element is unloaded
void* get(int i) { if (!find(i)) load(i); return find(i); } // loads the element first, and then returns its address
int available() const { int i = 0; for (; i < (int)size(); ++i) if (find(i) != 0) break; return i; }
inline void load(int i);
inline void unload(int i);
Create creator() const { return create_; }
Destroy destroyer() const { return destroy_; }
Load loader() const { return load_; }
Save saver() const { return save_; }
void* create() const { return create_(); }
void destroy(int i) { if (find(i)) { destroy_(find(i)); elements_[i] = 0; } else if (external_[i] != -1) storage_->destroy(external_[i]); }
bool own() const { return destroy_ != 0; }
ExternalStorage* storage() const { return storage_; }
private:
Create create_;
Destroy destroy_;
ExternalStorage* storage_;
Save save_;
Load load_;
Elements elements_;
std::vector<int> external_;
CInt in_memory_;
};
}
void
diy::Collection::
clear()
{
if (own())
for (size_t i = 0; i < size(); ++i)
destroy(i);
elements_.clear();
external_.clear();
*in_memory_.access() = 0;
}
void
diy::Collection::
unload(int i)
{
//BinaryBuffer bb;
void* e = find(i);
//save_(e, bb);
//external_[i] = storage_->put(bb);
external_[i] = storage_->put(e, save_);
destroy_(e);
elements_[i] = 0;
--(*in_memory_.access());
}
void
diy::Collection::
load(int i)
{
//BinaryBuffer bb;
//storage_->get(external_[i], bb);
void* e = create_();
//load_(e, bb);
storage_->get(external_[i], e, load_);
elements_[i] = e;
external_[i] = -1;
++(*in_memory_.access());
}
#endif
#ifndef DIY_COMMUNICATOR_HPP
#define DIY_COMMUNICATOR_HPP
#warning "diy::Communicator (in diy/communicator.hpp) is deprecated, use diy::mpi::communicator directly"
#include "mpi.hpp"
namespace diy
{
typedef mpi::communicator Communicator;
}
#endif
#ifndef DIY_CONSTANTS_H
#define DIY_CONSTANTS_H
// Default DIY_MAX_DIM to 4, unless provided by the user
// (used for static min/max size in various Bounds)
#ifndef DIY_MAX_DIM
#define DIY_MAX_DIM 4
#endif
enum
{
DIY_X0 = 0x01, /* minimum-side x (left) neighbor */
DIY_X1 = 0x02, /* maximum-side x (right) neighbor */
DIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
DIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
DIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
DIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
DIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
DIY_T1 = 0x80 /* maximum-side t (later) neighbor */
};
#endif
#ifndef DIY_CRITICAL_RESOURCE_HPP
#define DIY_CRITICAL_RESOURCE_HPP