IOSS  2.0
Ioss_ParallelUtils.h
Go to the documentation of this file.
1 // Copyright(C) 1999-2017 National Technology & Engineering Solutions
2 // of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
3 // NTESS, the U.S. Government retains certain rights in this software.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following
14 // disclaimer in the documentation and/or other materials provided
15 // with the distribution.
16 //
17 // * Neither the name of NTESS nor the names of its
18 // contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 #ifndef IOSS_Ioss_ParallelUtils_h
34 #define IOSS_Ioss_ParallelUtils_h
35 
36 #include <Ioss_CodeTypes.h> // for Int64Vector, IntVector
37 #include <Ioss_Utils.h>
38 #include <cassert>
39 #include <cstddef> // for size_t
40 #include <string> // for string
41 #include <vector> // for vector
42 
43 #ifdef SEACAS_HAVE_MPI
44 #include <Ioss_SerializeIO.h>
45 #endif
46 
47 namespace Ioss {
48 
50  {
51  public:
52  explicit ParallelUtils(MPI_Comm the_communicator);
53  ~ParallelUtils() = default;
54 
55  // Assignment operator
56  // Copy constructor
57 
58  enum MinMax { DO_MAX, DO_MIN, DO_SUM };
59 
60  /*!
61  * Returns 'true' if 'name' is defined in the environment.
62  * The value of the environment variable is returned in 'value'.
63  * getenv system call is only done on processor 0.
64  * If '!sync_parallel', then don't push to other processors.
65  */
66  bool get_environment(const std::string &name, std::string &value, bool sync_parallel) const;
67 
68  /*!
69  * See if any external properties specified via the
70  * IOSS_PROPERTIES environment variable. If any found, add to
71  * `properties`.
72  */
74 
75  /*!
76  * Returns 'true' if 'name' is defined in the environment.
77  * The value of the environment variable is converted to an
78  * integer via the atoi library call and returned in 'value'.
79  * No checking is done to ensure that the environment variable
80  * points to a valid integer.
81  * getenv system call is only done on processor 0.
82  * If '!sync_parallel', then don't push to other processors.
83  */
84  bool get_environment(const std::string &name, int &value, bool sync_parallel) const;
85 
86  /*!
87  * Returns 'true' if 'name' is defined in the environment no
88  * matter what the value. Returns false otherwise.
89  * getenv system call is only done on processor 0.
90  * If '!sync_parallel', then don't push to other processors.
91  */
92  bool get_environment(const std::string &name, bool sync_parallel) const;
93 
94  std::string decode_filename(const std::string &filename, bool is_parallel) const;
95 
96  MPI_Comm communicator() const { return communicator_; }
97  int parallel_size() const;
98  int parallel_rank() const;
99 
100  void barrier() const;
101 
102  /*!
103  * Global OR of attribute strings, the processors which have no
104  * knowledge of the value should initialize to '0' and the
105  * processors with knowledge set the appropriate values.
106  */
107  void attribute_reduction(int length, char buffer[]) const;
108 
109  /*!
110  * Generate a "globally unique id" which is unique over all entities
111  * of a specific type over all processors.
112  * Used by some applications for uniquely identifying an entity.
113  * If `rank` == -1, then use parallel_rank; otherwise use rank
114  */
115  int64_t generate_guid(size_t id, int rank = -1) const;
116 
117  /*! Return min, max, average memory used by any process */
118  void memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
119 
120  /*! Return high-water-mark min, max, average memory used by any process */
121  /* May be inaccurate unless system maintains this information */
122  void hwm_memory_stats(int64_t &min, int64_t &max, int64_t &avg) const;
123 
124  /*! Vector 'local_counts' contains the number of objects
125  * local to this processor. On exit, global_counts
126  * contains the total number of objects on all processors.
127  * Assumes that ordering is the same on all processors
128  */
129  void global_count(const IntVector &local_counts, IntVector &global_counts) const;
130  void global_count(const Int64Vector &local_counts, Int64Vector &global_counts) const;
131 
132  template <typename T> T global_minmax(T local_minmax, MinMax which) const;
133 
134  template <typename T>
135  void global_array_minmax(std::vector<T> &local_minmax, MinMax which) const;
136 
137  template <typename T> void gather(T my_value, std::vector<T> &result) const;
138  template <typename T> void all_gather(T my_value, std::vector<T> &result) const;
139  template <typename T> void gather(std::vector<T> &my_values, std::vector<T> &result) const;
140  template <typename T> void all_gather(std::vector<T> &my_values, std::vector<T> &result) const;
141  template <typename T>
142  int gather(int vals_count, int size_per_val, std::vector<T> &my_values,
143  std::vector<T> &result) const;
144 
145  void progress(const std::string &output) const;
146 
147  private:
149  };
150 
151 #ifdef SEACAS_HAVE_MPI
152  inline MPI_Datatype mpi_type(double /*dummy*/) { return MPI_DOUBLE; }
153  inline MPI_Datatype mpi_type(float /*dummy*/) { return MPI_FLOAT; }
154  inline MPI_Datatype mpi_type(int /*dummy*/) { return MPI_INT; }
155  inline MPI_Datatype mpi_type(char /*dummy*/) { return MPI_CHAR; }
156  inline MPI_Datatype mpi_type(long int /*dummy*/) { return MPI_LONG_LONG_INT; }
157  inline MPI_Datatype mpi_type(long long int /*dummy*/) { return MPI_LONG_LONG_INT; }
158  inline MPI_Datatype mpi_type(unsigned int /*dummy*/) { return MPI_UNSIGNED; }
159  inline MPI_Datatype mpi_type(unsigned long int /*dummy*/) { return MPI_UNSIGNED_LONG; }
160 
161  template <typename T>
162  int MY_Alltoallv64(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcounts,
163  const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
164  const std::vector<int64_t> &recvcounts, const std::vector<int64_t> &recvdisp,
165  MPI_Comm comm)
166  {
167  int processor_count = 0;
168  int my_processor = 0;
169  MPI_Comm_size(comm, &processor_count);
170  MPI_Comm_rank(comm, &my_processor);
171 
172  // Verify that all 'counts' can fit in an integer. Symmetric
173  // communication, so recvcounts are sendcounts on another processor.
174  for (int i = 0; i < processor_count; i++) {
175  int snd_cnt = static_cast<int>(sendcounts[i]);
176  if (static_cast<int64_t>(snd_cnt) != sendcounts[i]) {
177  std::ostringstream errmsg;
178  errmsg << "ERROR: The number of items that must be communicated via MPI calls from\n"
179  << " processor " << my_processor << " to processor " << i << " is "
180  << sendcounts[i]
181  << "\n which exceeds the storage capacity of the integers "
182  "used by MPI functions.\n";
183  std::cerr << errmsg.str();
184  exit(EXIT_FAILURE);
185  }
186  }
187 
188  size_t pow_2 = Ioss::Utils::power_2(processor_count);
189 
190  for (size_t i = 1; i < pow_2; i++) {
191  MPI_Status status{};
192 
193  int tag = 24713;
194  size_t exchange_proc = i ^ my_processor;
195  if (exchange_proc < static_cast<size_t>(processor_count)) {
196  int snd_cnt = static_cast<int>(
197  sendcounts[exchange_proc]); // Converts from int64_t to int as needed by mpi
198  int rcv_cnt = static_cast<int>(recvcounts[exchange_proc]);
199  if (static_cast<size_t>(my_processor) < exchange_proc) {
200  MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
201  exchange_proc, tag, comm);
202  MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
203  comm, &status);
204  }
205  else {
206  MPI_Recv(&recvbuf[recvdisp[exchange_proc]], rcv_cnt, mpi_type(T(0)), exchange_proc, tag,
207  comm, &status);
208  MPI_Send((void *)&sendbuf[senddisp[exchange_proc]], snd_cnt, mpi_type(T(0)),
209  exchange_proc, tag, comm);
210  }
211  }
212  }
213 
214  // Take care of this processor's data movement...
215  std::copy(&sendbuf[senddisp[my_processor]],
216  &sendbuf[senddisp[my_processor] + sendcounts[my_processor]],
217  &recvbuf[recvdisp[my_processor]]);
218  return 0;
219  }
220 
221  template <typename T>
222  int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int64_t> &sendcnts,
223  const std::vector<int64_t> &senddisp, std::vector<T> &recvbuf,
224  const std::vector<int64_t> &recvcnts, const std::vector<int64_t> &recvdisp,
225  MPI_Comm comm)
226  {
227 // Wrapper to handle case where send/recv counts and displacements are 64-bit integers.
228 // Two cases:
229 // 1) They are of type 64-bit integers, but only storing data in the 32-bit integer range.
230 // -- if (sendcnts[#proc-1] + senddisp[#proc-1] < 2^31, then we are ok
231 // 2) They are of type 64-bit integers, and storing data in the 64-bit integer range.
232 // -- call special alltoallv which does point-to-point sends
233 #if 1
234  int processor_count = 0;
235  MPI_Comm_size(comm, &processor_count);
236  size_t max_comm = sendcnts[processor_count - 1] + senddisp[processor_count - 1];
237  size_t one = 1;
238  if (max_comm < one << 31) {
239  // count and displacement data in range, need to copy to integer vector.
240  std::vector<int> send_cnt(sendcnts.begin(), sendcnts.end());
241  std::vector<int> send_dis(senddisp.begin(), senddisp.end());
242  std::vector<int> recv_cnt(recvcnts.begin(), recvcnts.end());
243  std::vector<int> recv_dis(recvdisp.begin(), recvdisp.end());
244  return MPI_Alltoallv((void *)sendbuf.data(), send_cnt.data(), send_dis.data(), mpi_type(T(0)),
245  (void *)recvbuf.data(), recv_cnt.data(), recv_dis.data(), mpi_type(T(0)),
246  comm);
247  }
248  else {
249 #endif
250  // Same as if each processor sent a message to every other process with:
251  // MPI_Send(sendbuf+senddisp[i]*sizeof(sendtype),sendcnts[i], sendtype, i, tag, comm);
252  // And received a message from each processor with a call to:
253  // MPI_Recv(recvbuf+recvdisp[i]*sizeof(recvtype),recvcnts[i], recvtype, i, tag, comm);
254  return MY_Alltoallv64(sendbuf, sendcnts, senddisp, recvbuf, recvcnts, recvdisp, comm);
255 #if 1
256  }
257 #endif
258  }
259 
260  template <typename T>
261  int MY_Alltoallv(const std::vector<T> &sendbuf, const std::vector<int> &sendcnts,
262  const std::vector<int> &senddisp, std::vector<T> &recvbuf,
263  const std::vector<int> &recvcnts, const std::vector<int> &recvdisp,
264  MPI_Comm comm)
265  {
266  return MPI_Alltoallv((void *)sendbuf.data(), const_cast<int *>(sendcnts.data()),
267  const_cast<int *>(senddisp.data()), mpi_type(T(0)), recvbuf.data(),
268  const_cast<int *>(recvcnts.data()), const_cast<int *>(recvdisp.data()),
269  mpi_type(T(0)), comm);
270  }
271 #endif
272 
273  template <typename T>
274  void ParallelUtils::global_array_minmax(std::vector<T> &local_minmax, MinMax which) const
275  {
276  PAR_UNUSED(local_minmax);
277  PAR_UNUSED(which);
278 #ifdef SEACAS_HAVE_MPI
279  if (parallel_size() > 1 && !local_minmax.empty()) {
281  std::ostringstream errmsg;
282  errmsg << "Attempting mpi while in barrier owned by " << Ioss::SerializeIO::getOwner();
283  IOSS_ERROR(errmsg);
284  }
285 
286  std::vector<T> maxout(local_minmax.size());
287  MPI_Op oper = MPI_MAX;
288  if (which == Ioss::ParallelUtils::DO_MAX) {
289  oper = MPI_MAX;
290  }
291  else if (which == Ioss::ParallelUtils::DO_MIN) {
292  oper = MPI_MIN;
293  }
294  else if (which == Ioss::ParallelUtils::DO_SUM) {
295  oper = MPI_SUM;
296  }
297 
298  const int success =
299  MPI_Allreduce((void *)(local_minmax.data()), maxout.data(),
300  static_cast<int>(local_minmax.size()), mpi_type(T()), oper, communicator_);
301  if (success != MPI_SUCCESS) {
302  std::ostringstream errmsg;
303  errmsg << "Ioss::ParallelUtils::global_array_minmax - MPI_Allreduce failed";
304  IOSS_ERROR(errmsg);
305  }
306  // Now copy back into passed in array...
307  for (size_t i = 0; i < local_minmax.size(); i++) {
308  local_minmax[i] = maxout[i];
309  }
310  }
311 #endif
312  }
313 
314 } // namespace Ioss
315 #endif
Ioss::ParallelUtils::hwm_memory_stats
void hwm_memory_stats(int64_t &min, int64_t &max, int64_t &avg) const
Definition: Ioss_ParallelUtils.C:270
Ioss::IntVector
std::vector< int > IntVector
Definition: Ioss_CodeTypes.h:51
Ioss::ParallelUtils::attribute_reduction
void attribute_reduction(int length, char buffer[]) const
Definition: Ioss_ParallelUtils.C:304
Ioss::ParallelUtils::global_array_minmax
void global_array_minmax(std::vector< T > &local_minmax, MinMax which) const
Definition: Ioss_ParallelUtils.h:274
Ioss::ParallelUtils::decode_filename
std::string decode_filename(const std::string &filename, bool is_parallel) const
Definition: Ioss_ParallelUtils.C:218
Ioss::ParallelUtils::DO_MIN
Definition: Ioss_ParallelUtils.h:58
Ioss::ParallelUtils::parallel_rank
int parallel_rank() const
Definition: Ioss_ParallelUtils.C:245
Ioss
The main namespace for the Ioss library.
Definition: Ioad_DatabaseIO.C:66
Ioss::ParallelUtils::DO_SUM
Definition: Ioss_ParallelUtils.h:58
Ioss::ParallelUtils::add_environment_properties
void add_environment_properties(Ioss::PropertyManager &properties)
Definition: Ioss_ParallelUtils.C:78
Ioss::ParallelUtils::~ParallelUtils
~ParallelUtils()=default
Ioss::ParallelUtils::global_minmax
T global_minmax(T local_minmax, MinMax which) const
Definition: Ioss_ParallelUtils.C:409
Ioss::SerializeIO::inBarrier
static bool inBarrier()
Definition: Ioss_SerializeIO.h:108
Ioss::PropertyManager
A collection of Ioss::Property objects.
Definition: Ioss_PropertyManager.h:49
anonymous_namespace{Iocgns_DecompositionData.C}::rank
int rank
Definition: Iocgns_DecompositionData.C:55
Ioss::SerializeIO::isEnabled
static bool isEnabled()
Definition: Ioss_SerializeIO.h:106
Ioss::ParallelUtils::memory_stats
void memory_stats(int64_t &min, int64_t &max, int64_t &avg) const
Definition: Ioss_ParallelUtils.C:256
IOSS_ERROR
#define IOSS_ERROR(errmsg)
Definition: Ioss_Utils.h:65
Ioss::ParallelUtils::generate_guid
int64_t generate_guid(size_t id, int rank=-1) const
Definition: Ioss_ParallelUtils.C:287
Ioss::Int64Vector
std::vector< int64_t > Int64Vector
Definition: Ioss_CodeTypes.h:52
Ioss_Utils.h
Ioss::Utils::power_2
static int power_2(int count)
Definition: Ioss_Utils.h:213
Ioss::ParallelUtils::DO_MAX
Definition: Ioss_ParallelUtils.h:58
Ioss_SerializeIO.h
Ioss::ParallelUtils::all_gather
void all_gather(T my_value, std::vector< T > &result) const
Definition: Ioss_ParallelUtils.C:478
Ioss::ParallelUtils::parallel_size
int parallel_size() const
Definition: Ioss_ParallelUtils.C:234
Ioss::ParallelUtils::global_count
void global_count(const IntVector &local_counts, IntVector &global_counts) const
Definition: Ioss_ParallelUtils.C:333
Ioss::ParallelUtils::get_environment
bool get_environment(const std::string &name, std::string &value, bool sync_parallel) const
Definition: Ioss_ParallelUtils.C:126
Ioss::ParallelUtils::gather
void gather(T my_value, std::vector< T > &result) const
Definition: Ioss_ParallelUtils.C:455
Ioss::ParallelUtils::communicator
MPI_Comm communicator() const
Definition: Ioss_ParallelUtils.h:96
Ioss::ParallelUtils
Definition: Ioss_ParallelUtils.h:49
Ioss::ParallelUtils::barrier
void barrier() const
Definition: Ioss_ParallelUtils.C:326
anonymous_namespace{io_info.C}::name
std::string name(const Ioss::GroupingEntity *entity)
Definition: io_info.C:89
MPI_Comm
int MPI_Comm
Definition: Ioss_CodeTypes.h:96
Ioss::ParallelUtils::communicator_
MPI_Comm communicator_
Definition: Ioss_ParallelUtils.h:148
Ioss::ParallelUtils::ParallelUtils
ParallelUtils(MPI_Comm the_communicator)
Definition: Ioss_ParallelUtils.C:76
Ioss_CodeTypes.h
Ioss::ParallelUtils::MinMax
MinMax
Definition: Ioss_ParallelUtils.h:58
PAR_UNUSED
#define PAR_UNUSED(x)
Definition: Ioss_CodeTypes.h:89
Ioss::ParallelUtils::progress
void progress(const std::string &output) const
Definition: Ioss_ParallelUtils.C:522
Ioss::SerializeIO::getOwner
static int getOwner()
Definition: Ioss_SerializeIO.h:94