DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
block.hpp
1 #ifndef DIY_IO_BLOCK_HPP
2 #define DIY_IO_BLOCK_HPP
3 
4 #include <string>
5 #include <algorithm>
6 #include <stdexcept>
7 
8 #include <unistd.h>
9 #include <sys/stat.h>
10 #include <dirent.h>
11 
12 #include "../mpi.hpp"
13 #include "../assigner.hpp"
14 #include "../master.hpp"
15 #include "../storage.hpp"
16 #include "../log.hpp"
17 
18 // Read and write collections of blocks using MPI-IO
19 namespace diy
20 {
21 namespace io
22 {
23  namespace detail
24  {
25  typedef mpi::io::offset offset_t;
26 
28  {
29  GidOffsetCount(): // need to initialize a vector of given size
30  gid(-1), offset(0), count(0) {}
31 
32  GidOffsetCount(int gid_, offset_t offset_, offset_t count_):
33  gid(gid_), offset(offset_), count(count_) {}
34 
35  bool operator<(const GidOffsetCount& other) const { return gid < other.gid; }
36 
37  int gid;
38  offset_t offset;
39  offset_t count;
40  };
41  }
42 }
43 
44 // Serialize GidOffsetCount explicitly, to avoid alignment and unitialized data issues
45 // (to get identical output files given the same block input)
46 template<>
47 struct Serialization<io::detail::GidOffsetCount>
48 {
50 
51  static void save(BinaryBuffer& bb, const GidOffsetCount& x)
52  {
53  diy::save(bb, x.gid);
54  diy::save(bb, x.offset);
55  diy::save(bb, x.count);
56  }
57 
58  static void load(BinaryBuffer& bb, GidOffsetCount& x)
59  {
60  diy::load(bb, x.gid);
61  diy::load(bb, x.offset);
62  diy::load(bb, x.count);
63  }
64 };
65 
66 namespace io
67 {
72  inline
73  void
74  write_blocks(const std::string& outfilename,
75  const mpi::communicator& comm,
76  Master& master,
77  const MemoryBuffer& extra = MemoryBuffer(),
78  Master::SaveBlock save = 0)
79  {
80  if (!save) save = master.saver(); // save is likely to be different from master.save()
81 
82  typedef detail::offset_t offset_t;
83  typedef detail::GidOffsetCount GidOffsetCount;
84 
85  unsigned size = master.size(),
86  max_size, min_size;
87  mpi::all_reduce(comm, size, max_size, mpi::maximum<unsigned>());
88  mpi::all_reduce(comm, size, min_size, mpi::minimum<unsigned>());
89 
90  // truncate the file
91  if (comm.rank() == 0)
92  truncate(outfilename.c_str(), 0);
93 
94  mpi::io::file f(comm, outfilename, mpi::io::file::wronly | mpi::io::file::create);
95 
96  offset_t start = 0, shift;
97  std::vector<GidOffsetCount> offset_counts;
98  unsigned i;
99  for (i = 0; i < max_size; ++i)
100  {
101  offset_t count = 0,
102  offset;
103  if (i < size)
104  {
105  // get the block from master and serialize it
106  const void* block = master.get(i);
107  MemoryBuffer bb;
108  LinkFactory::save(bb, master.link(i));
109  save(block, bb);
110  count = bb.buffer.size();
111  mpi::scan(comm, count, offset, std::plus<offset_t>());
112  offset += start - count;
113  mpi::all_reduce(comm, count, shift, std::plus<offset_t>());
114  start += shift;
115 
116  if (i < min_size) // up to min_size, we can do collective IO
117  f.write_at_all(offset, bb.buffer);
118  else
119  f.write_at(offset, bb.buffer);
120 
121  offset_counts.push_back(GidOffsetCount(master.gid(i), offset, count));
122  } else
123  {
124  // matching global operations
125  mpi::scan(comm, count, offset, std::plus<offset_t>());
126  mpi::all_reduce(comm, count, shift, std::plus<offset_t>());
127 
128  // -1 indicates that there is no block written here from this rank
129  offset_counts.push_back(GidOffsetCount(-1, offset, count));
130  }
131  }
132 
133  if (comm.rank() == 0)
134  {
135  // round-about way of gather vector of vectors of GidOffsetCount to avoid registering a new mpi datatype
136  std::vector< std::vector<char> > gathered_offset_count_buffers;
137  MemoryBuffer oc_buffer; diy::save(oc_buffer, offset_counts);
138  mpi::gather(comm, oc_buffer.buffer, gathered_offset_count_buffers, 0);
139 
140  std::vector<GidOffsetCount> all_offset_counts;
141  for (unsigned i = 0; i < gathered_offset_count_buffers.size(); ++i)
142  {
143  MemoryBuffer oc_buffer; oc_buffer.buffer.swap(gathered_offset_count_buffers[i]);
144  std::vector<GidOffsetCount> offset_counts;
145  diy::load(oc_buffer, offset_counts);
146  for (unsigned j = 0; j < offset_counts.size(); ++j)
147  if (offset_counts[j].gid != -1)
148  all_offset_counts.push_back(offset_counts[j]);
149  }
150  std::sort(all_offset_counts.begin(), all_offset_counts.end()); // sorts by gid
151 
152  MemoryBuffer bb;
153  diy::save(bb, all_offset_counts);
154  diy::save(bb, extra);
155  size_t footer_size = bb.size();
156  diy::save(bb, footer_size);
157 
158  // find footer_offset as the max of (offset + count)
159  offset_t footer_offset = 0;
160  for (unsigned i = 0; i < all_offset_counts.size(); ++i)
161  {
162  offset_t end = all_offset_counts[i].offset + all_offset_counts[i].count;
163  if (end > footer_offset)
164  footer_offset = end;
165  }
166  f.write_at(footer_offset, bb.buffer);
167  } else
168  {
169  MemoryBuffer oc_buffer; diy::save(oc_buffer, offset_counts);
170  mpi::gather(comm, oc_buffer.buffer, 0);
171  }
172  }
173 
178  inline
179  void
180  read_blocks(const std::string& infilename,
181  const mpi::communicator& comm,
182  Assigner& assigner,
183  Master& master,
184  MemoryBuffer& extra,
185  Master::LoadBlock load = 0)
186  {
187  if (!load) load = master.loader(); // load is likely to be different from master.load()
188 
189  typedef detail::offset_t offset_t;
190  typedef detail::GidOffsetCount GidOffsetCount;
191 
192  mpi::io::file f(comm, infilename, mpi::io::file::rdonly);
193 
194  offset_t footer_offset = f.size() - sizeof(size_t);
195  size_t footer_size;
196 
197  // Read the size
198  f.read_at_all(footer_offset, (char*) &footer_size, sizeof(footer_size));
199 
200  // Read all_offset_counts
201  footer_offset -= footer_size;
202  MemoryBuffer footer;
203  footer.buffer.resize(footer_size);
204  f.read_at_all(footer_offset, footer.buffer);
205 
206  std::vector<GidOffsetCount> all_offset_counts;
207  diy::load(footer, all_offset_counts);
208  diy::load(footer, extra);
209  extra.reset();
210 
211  // Get local gids from assigner
212  size_t size = all_offset_counts.size();
213  assigner.set_nblocks(size);
214  std::vector<int> gids;
215  assigner.local_gids(comm.rank(), gids);
216 
217  for (unsigned i = 0; i < gids.size(); ++i)
218  {
219  if (gids[i] != all_offset_counts[gids[i]].gid)
220  get_logger()->warn("gids don't match in diy::io::read_blocks(), {} vs {}",
221  gids[i], all_offset_counts[gids[i]].gid);
222 
223  offset_t offset = all_offset_counts[gids[i]].offset,
224  count = all_offset_counts[gids[i]].count;
225  MemoryBuffer bb;
226  bb.buffer.resize(count);
227  f.read_at(offset, bb.buffer);
228  Link* l = LinkFactory::load(bb);
229  l->fix(assigner);
230  void* b = master.create();
231  load(b, bb);
232  master.add(gids[i], b, l);
233  }
234  }
235 
236 
237  // Functions without the extra buffer, for compatibility with the old code
238  inline
239  void
240  write_blocks(const std::string& outfilename,
241  const mpi::communicator& comm,
242  Master& master,
243  Master::SaveBlock save)
244  {
245  MemoryBuffer extra;
246  write_blocks(outfilename, comm, master, extra, save);
247  }
248 
249  inline
250  void
251  read_blocks(const std::string& infilename,
252  const mpi::communicator& comm,
253  Assigner& assigner,
254  Master& master,
255  Master::LoadBlock load = 0)
256  {
257  MemoryBuffer extra; // dummy
258  read_blocks(infilename, comm, assigner, master, extra, load);
259  }
260 
261 namespace split
262 {
267  inline
268  void
269  write_blocks(const std::string& outfilename,
270  const mpi::communicator& comm,
271  Master& master,
272  const MemoryBuffer& extra = MemoryBuffer(),
273  Master::SaveBlock save = 0)
274  {
275  if (!save) save = master.saver(); // save is likely to be different from master.save()
276 
277  bool proceed = false;
278  size_t size = 0;
279  if (comm.rank() == 0)
280  {
281  struct stat s;
282  if (stat(outfilename.c_str(), &s) == 0)
283  {
284  if (S_ISDIR(s.st_mode))
285  proceed = true;
286  } else if (mkdir(outfilename.c_str(), 0755) == 0)
287  proceed = true;
288  mpi::broadcast(comm, proceed, 0);
289  mpi::reduce(comm, (size_t) master.size(), size, 0, std::plus<size_t>());
290  } else
291  {
292  mpi::broadcast(comm, proceed, 0);
293  mpi::reduce(comm, (size_t) master.size(), 0, std::plus<size_t>());
294  }
295 
296  if (!proceed)
297  throw std::runtime_error("Cannot access or create directory: " + outfilename);
298 
299  for (int i = 0; i < (int)master.size(); ++i)
300  {
301  const void* block = master.get(i);
302 
303  std::string filename = fmt::format("{}/{}", outfilename, master.gid(i));
304 
305  ::diy::detail::FileBuffer bb(fopen(filename.c_str(), "w"));
306 
307  LinkFactory::save(bb, master.link(i));
308  save(block, bb);
309 
310  fclose(bb.file);
311  }
312 
313  if (comm.rank() == 0)
314  {
315  // save the extra buffer
316  std::string filename = outfilename + "/extra";
317  ::diy::detail::FileBuffer bb(fopen(filename.c_str(), "w"));
318  ::diy::save(bb, size);
319  ::diy::save(bb, extra);
320  fclose(bb.file);
321  }
322  }
323 
328  inline
329  void
330  read_blocks(const std::string& infilename,
331  const mpi::communicator& comm,
332  Assigner& assigner,
333  Master& master,
334  MemoryBuffer& extra,
335  Master::LoadBlock load = 0)
336  {
337  if (!load) load = master.loader(); // load is likely to be different from master.load()
338 
339  // load the extra buffer and size
340  size_t size;
341  std::string filename = infilename + "/extra";
342  ::diy::detail::FileBuffer bb(fopen(filename.c_str(), "r"));
343  ::diy::load(bb, size);
344  ::diy::load(bb, extra);
345  extra.reset();
346  fclose(bb.file);
347 
348  // Get local gids from assigner
349  assigner.set_nblocks(size);
350  std::vector<int> gids;
351  assigner.local_gids(comm.rank(), gids);
352 
353  // Read our blocks;
354  for (unsigned i = 0; i < gids.size(); ++i)
355  {
356  std::string filename = fmt::format("{}/{}", infilename, gids[i]);
357 
358  ::diy::detail::FileBuffer bb(fopen(filename.c_str(), "r"));
359  Link* l = LinkFactory::load(bb);
360  l->fix(assigner);
361  void* b = master.create();
362  load(b, bb);
363  master.add(gids[i], b, l);
364 
365  fclose(bb.file);
366  }
367  }
368 
369  // Functions without the extra buffer, for compatibility with the old code
370  inline
371  void
372  write_blocks(const std::string& outfilename,
373  const mpi::communicator& comm,
374  Master& master,
375  Master::SaveBlock save)
376  {
377  MemoryBuffer extra;
378  write_blocks(outfilename, comm, master, extra, save);
379  }
380 
381  inline
382  void
383  read_blocks(const std::string& infilename,
384  const mpi::communicator& comm,
385  Assigner& assigner,
386  Master& master,
387  Master::LoadBlock load = 0)
388  {
389  MemoryBuffer extra; // dummy
390  read_blocks(infilename, comm, assigner, master, extra, load);
391  }
392 } // split
393 } // io
394 } // diy
395 
396 #endif
Definition: operations.hpp:10
void read_blocks(const std::string &infilename, const mpi::communicator &comm, Assigner &assigner, Master &master, MemoryBuffer &extra, Master::LoadBlock load=0)
Read blocks from storage independently from one file per process.
Definition: block.hpp:330
void load(BinaryBuffer &bb, T &x)
Loads x from bb by calling diy::Serialization<T>::load(bb,x).
Definition: serialization.hpp:106
void * get(int i)
return the i-th block, loading it if necessary
Definition: master.hpp:217
void set_nblocks(int nblocks)
sets the total number of global blocks
Definition: assigner.hpp:28
void save(BinaryBuffer &bb, const T &x)
Saves x to bb by calling diy::Serialization<T>::save(bb,x).
Definition: serialization.hpp:102
int gid(int i) const
return gid of the i-th block
Definition: master.hpp:219
Definition: operations.hpp:12
void reduce(const communicator &comm, const T &in, T &out, int root, const Op &op)
reduce
Definition: collectives.hpp:286
Definition: block.hpp:27
Definition: assigner.hpp:11
void scan(const communicator &comm, const T &in, T &out, const Op &op)
scan
Definition: collectives.hpp:314
int add(int gid, void *b, Link *l)
add a block
Definition: master.hpp:658
Definition: master.hpp:35
Wraps MPI file IO.
Definition: io.hpp:16
Simple wrapper around MPI_Comm.
Definition: communicator.hpp:8
Definition: serialization.hpp:26
virtual void local_gids(int rank, std::vector< int > &gids) const =0
gets the local gids for a given process rank
Main interface to serialization, meant to be specialized for the types that require special handling...
Definition: serialization.hpp:90
void all_reduce(const communicator &comm, const T &in, T &out, const Op &op)
all_reduce
Definition: collectives.hpp:300
void write_blocks(const std::string &outfilename, const mpi::communicator &comm, Master &master, const MemoryBuffer &extra=MemoryBuffer(), Master::SaveBlock save=0)
Write blocks to storage independently in one file per process.
Definition: block.hpp:269
void broadcast(const communicator &comm, T &x, int root)
Broadcast to all processes in comm.
Definition: collectives.hpp:219
A serialization buffer.
Definition: serialization.hpp:19
void gather(const communicator &comm, const T &in, std::vector< T > &out, int root)
Gather from all processes in comm. On root process, out is resized to comm.size() and filled with ele...
Definition: collectives.hpp:242
unsigned size() const
return the number of local blocks
Definition: master.hpp:233
void sort(Master &master, const Assigner &assigner, std::vector< T > Block::*values, std::vector< T > Block::*samples, size_t num_samples, const Cmp &cmp, int k=2, bool samples_only=false)
sample sort values of each block, store the boundaries between blocks in samples
Definition: algorithms.hpp:25