DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
kdtree.hpp
1 #ifndef DIY_DETAIL_ALGORITHMS_KDTREE_HPP
2 #define DIY_DETAIL_ALGORITHMS_KDTREE_HPP
3 
4 #include <vector>
5 #include <cassert>
6 #include "../../partners/all-reduce.hpp"
7 #include "../../log.hpp"
8 
9 namespace diy
10 {
11 namespace detail
12 {
13 
14 struct KDTreePartners;
15 
16 template<class Block, class Point>
17 struct KDTreePartition
18 {
19  typedef diy::RegularContinuousLink RCLink;
20  typedef diy::ContinuousBounds Bounds;
21 
22  typedef std::vector<size_t> Histogram;
23 
24  KDTreePartition(int dim,
25  std::vector<Point> Block::* points,
26  size_t bins):
27  dim_(dim), points_(points), bins_(bins) {}
28 
29  void operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners) const;
30 
31  int divide_gid(int gid, bool lower, int round, int rounds) const;
32  void update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const;
33  void split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const;
35  find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const;
36 
37  void compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const;
38  void add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const;
39  void receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const;
40  void forward_histogram(Block* b, const diy::ReduceProxy& srp, const Histogram& histogram) const;
41 
42  void enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Histogram& histogram) const;
43  void dequeue_exchange(Block* b, const diy::ReduceProxy& srp, int dim) const;
44 
45  void update_neighbor_bounds(Bounds& bounds, float split, int dim, bool lower) const;
46  bool intersects(const Bounds& x, const Bounds& y, int dim, bool wrap, const Bounds& domain) const;
47  float find_split(const Bounds& changed, const Bounds& original) const;
48 
49  int dim_;
50  std::vector<Point> Block::* points_;
51  size_t bins_;
52 };
53 
54 }
55 }
56 
57 struct diy::detail::KDTreePartners
58 {
59  // bool = are we in a swap (vs histogram) round
60  // int = round within that partner
61  typedef std::pair<bool, int> RoundType;
62  typedef diy::ContinuousBounds Bounds;
63 
64  KDTreePartners(int dim, int nblocks, bool wrap_, const Bounds& domain_):
65  decomposer(1, interval(0,nblocks-1), nblocks),
66  histogram(decomposer, 2),
67  swap(decomposer, 2, false),
68  wrap(wrap_),
69  domain(domain_)
70  {
71  for (unsigned i = 0; i < swap.rounds(); ++i)
72  {
73  // fill histogram rounds
74  for (unsigned j = 0; j < histogram.rounds(); ++j)
75  {
76  rounds_.push_back(std::make_pair(false, j));
77  dim_.push_back(i % dim);
78  if (j == histogram.rounds() / 2 - 1 - i)
79  j += 2*i;
80  }
81 
82  // fill swap round
83  rounds_.push_back(std::make_pair(true, i));
84  dim_.push_back(i % dim);
85 
86  // fill link round
87  rounds_.push_back(std::make_pair(true, -1)); // (true, -1) signals link round
88  dim_.push_back(i % dim);
89  }
90  }
91 
92  size_t rounds() const { return rounds_.size(); }
93  size_t swap_rounds() const { return swap.rounds(); }
94 
95  int dim(int round) const { return dim_[round]; }
96  bool swap_round(int round) const { return rounds_[round].first; }
97  int sub_round(int round) const { return rounds_[round].second; }
98 
99  inline bool active(int round, int gid, const diy::Master& m) const
100  {
101  if (round == (int) rounds())
102  return true;
103  else if (swap_round(round) && sub_round(round) < 0) // link round
104  return true;
105  else if (swap_round(round))
106  return swap.active(sub_round(round), gid, m);
107  else
108  return histogram.active(sub_round(round), gid, m);
109  }
110 
111  inline void incoming(int round, int gid, std::vector<int>& partners, const diy::Master& m) const
112  {
113  if (round == (int) rounds())
114  link_neighbors(-1, gid, partners, m);
115  else if (swap_round(round) && sub_round(round) < 0) // link round
116  swap.incoming(sub_round(round - 1) + 1, gid, partners, m);
117  else if (swap_round(round))
118  histogram.incoming(histogram.rounds(), gid, partners, m);
119  else
120  {
121  if (round > 0 && sub_round(round) == 0)
122  link_neighbors(-1, gid, partners, m);
123  else if (round > 0 && sub_round(round - 1) != sub_round(round) - 1) // jump through the histogram rounds
124  histogram.incoming(sub_round(round - 1) + 1, gid, partners, m);
125  else
126  histogram.incoming(sub_round(round), gid, partners, m);
127  }
128  }
129 
130  inline void outgoing(int round, int gid, std::vector<int>& partners, const diy::Master& m) const
131  {
132  if (round == (int) rounds())
133  swap.outgoing(sub_round(round-1) + 1, gid, partners, m);
134  else if (swap_round(round) && sub_round(round) < 0) // link round
135  link_neighbors(-1, gid, partners, m);
136  else if (swap_round(round))
137  swap.outgoing(sub_round(round), gid, partners, m);
138  else
139  histogram.outgoing(sub_round(round), gid, partners, m);
140  }
141 
142  inline void link_neighbors(int, int gid, std::vector<int>& partners, const diy::Master& m) const
143  {
144  int lid = m.lid(gid);
145  diy::Link* link = m.link(lid);
146 
147  std::set<int> result; // partners must be unique
148  for (int i = 0; i < link->size(); ++i)
149  result.insert(link->target(i).gid);
150 
151  for (std::set<int>::const_iterator it = result.begin(); it != result.end(); ++it)
152  partners.push_back(*it);
153  }
154 
155  // 1-D domain to feed into histogram and swap
157 
160 
161  std::vector<RoundType> rounds_;
162  std::vector<int> dim_;
163 
164  bool wrap;
165  Bounds domain;
166 };
167 
168 template<class Block, class Point>
169 void
170 diy::detail::KDTreePartition<Block,Point>::
171 operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners) const
172 {
173  int dim;
174  if (srp.round() < partners.rounds())
175  dim = partners.dim(srp.round());
176  else
177  dim = partners.dim(srp.round() - 1);
178 
179  if (srp.round() == partners.rounds())
180  update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
181  else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0) // link round
182  {
183  dequeue_exchange(b, srp, dim); // from the swap round
184  split_to_neighbors(b, srp, dim);
185  }
186  else if (partners.swap_round(srp.round()))
187  {
188  Histogram histogram;
189  receive_histogram(b, srp, histogram);
190  enqueue_exchange(b, srp, dim, histogram);
191  } else if (partners.sub_round(srp.round()) == 0)
192  {
193  if (srp.round() > 0)
194  {
195  int prev_dim = dim - 1;
196  if (prev_dim < 0)
197  prev_dim += dim_;
198  update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
199  }
200 
201  compute_local_histogram(b, srp, dim);
202  } else if (partners.sub_round(srp.round()) < (int) partners.histogram.rounds()/2)
203  {
204  Histogram histogram(bins_);
205  add_histogram(b, srp, histogram);
206  srp.enqueue(srp.out_link().target(0), histogram);
207  }
208  else
209  {
210  Histogram histogram(bins_);
211  add_histogram(b, srp, histogram);
212  forward_histogram(b, srp, histogram);
213  }
214 }
215 
216 template<class Block, class Point>
217 int
218 diy::detail::KDTreePartition<Block,Point>::
219 divide_gid(int gid, bool lower, int round, int rounds) const
220 {
221  if (lower)
222  gid &= ~(1 << (rounds - 1 - round));
223  else
224  gid |= (1 << (rounds - 1 - round));
225  return gid;
226 }
227 
228 // round here is the outer iteration of the algorithm
229 template<class Block, class Point>
230 void
231 diy::detail::KDTreePartition<Block,Point>::
232 update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
233 {
234  int gid = srp.gid();
235  int lid = srp.master()->lid(gid);
236  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
237 
238  // (gid, dir) -> i
239  std::map<std::pair<int,diy::Direction>, int> link_map;
240  for (int i = 0; i < link->size(); ++i)
241  link_map[std::make_pair(link->target(i).gid, link->direction(i))] = i;
242 
243  // NB: srp.enqueue(..., ...) should match the link
244  std::vector<float> splits(link->size());
245  for (int i = 0; i < link->size(); ++i)
246  {
247  float split; diy::Direction dir;
248 
249  int in_gid = link->target(i).gid;
250  while(srp.incoming(in_gid))
251  {
252  srp.dequeue(in_gid, split);
253  srp.dequeue(in_gid, dir);
254 
255  // reverse dir
256  for (int j = 0; j < dim_; ++j)
257  dir[j] = -dir[j];
258 
259  int k = link_map[std::make_pair(in_gid, dir)];
260  splits[k] = split;
261  }
262  }
263 
264  RCLink new_link(dim_, link->core(), link->core());
265 
266  bool lower = !(gid & (1 << (rounds - 1 - round)));
267 
268  // fill out the new link
269  for (int i = 0; i < link->size(); ++i)
270  {
271  diy::Direction dir = link->direction(i);
272  //diy::Direction wrap_dir = link->wrap(i); // we don't use existing wrap, but restore it from scratch
273  if (dir[dim] != 0)
274  {
275  if ((dir[dim] < 0 && lower) || (dir[dim] > 0 && !lower))
276  {
277  int nbr_gid = divide_gid(link->target(i).gid, !lower, round, rounds);
278  diy::BlockID nbr = { nbr_gid, srp.assigner().rank(nbr_gid) };
279  new_link.add_neighbor(nbr);
280 
281  new_link.add_direction(dir);
282 
283  Bounds bounds = link->bounds(i);
284  update_neighbor_bounds(bounds, splits[i], dim, !lower);
285  new_link.add_bounds(bounds);
286 
287  if (wrap)
288  new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
289  else
290  new_link.add_wrap(diy::Direction());
291  }
292  } else // non-aligned side
293  {
294  for (int j = 0; j < 2; ++j)
295  {
296  int nbr_gid = divide_gid(link->target(i).gid, j == 0, round, rounds);
297 
298  Bounds bounds = link->bounds(i);
299  update_neighbor_bounds(bounds, splits[i], dim, j == 0);
300 
301  if (intersects(bounds, new_link.bounds(), dim, wrap, domain))
302  {
303  diy::BlockID nbr = { nbr_gid, srp.assigner().rank(nbr_gid) };
304  new_link.add_neighbor(nbr);
305  new_link.add_direction(dir);
306  new_link.add_bounds(bounds);
307 
308  if (wrap)
309  new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
310  else
311  new_link.add_wrap(diy::Direction());
312  }
313  }
314  }
315  }
316 
317  // add link to the dual block
318  int dual_gid = divide_gid(gid, !lower, round, rounds);
319  diy::BlockID dual = { dual_gid, srp.assigner().rank(dual_gid) };
320  new_link.add_neighbor(dual);
321 
322  Bounds nbr_bounds = link->bounds(); // old block bounds
323  update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
324  new_link.add_bounds(nbr_bounds);
325 
326  new_link.add_wrap(diy::Direction()); // dual block cannot be wrapped
327 
328  if (lower)
329  {
330  diy::Direction right;
331  right[dim] = 1;
332  new_link.add_direction(right);
333  } else
334  {
335  diy::Direction left;
336  left[dim] = -1;
337  new_link.add_direction(left);
338  }
339 
340  // update the link; notice that this won't conflict with anything since
341  // reduce is using its own notion of the link constructed through the
342  // partners
343  link->swap(new_link);
344 }
345 
346 template<class Block, class Point>
347 void
348 diy::detail::KDTreePartition<Block,Point>::
349 split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
350 {
351  int lid = srp.master()->lid(srp.gid());
352  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
353 
354  // determine split
355  float split = find_split(link->core(), link->bounds());
356 
357  for (int i = 0; i < link->size(); ++i)
358  {
359  srp.enqueue(link->target(i), split);
360  srp.enqueue(link->target(i), link->direction(i));
361  }
362 }
363 
364 template<class Block, class Point>
365 void
366 diy::detail::KDTreePartition<Block,Point>::
367 compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const
368 {
369  int lid = srp.master()->lid(srp.gid());
370  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
371 
372  // compute and enqueue local histogram
373  Histogram histogram(bins_);
374 
375  float width = (link->core().max[dim] - link->core().min[dim])/bins_;
376  for (size_t i = 0; i < (b->*points_).size(); ++i)
377  {
378  float x = (b->*points_)[i][dim];
379  int loc = (x - link->core().min[dim]) / width;
380  if (loc < 0)
381  throw std::runtime_error(fmt::format("{} {} {}", loc, x, link->core().min[dim]));
382  if (loc >= (int) bins_)
383  loc = bins_ - 1;
384  ++(histogram[loc]);
385  }
386 
387  srp.enqueue(srp.out_link().target(0), histogram);
388 }
389 
390 template<class Block, class Point>
391 void
392 diy::detail::KDTreePartition<Block,Point>::
393 add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
394 {
395  // dequeue and add up the histograms
396  for (int i = 0; i < srp.in_link().size(); ++i)
397  {
398  int nbr_gid = srp.in_link().target(i).gid;
399 
400  Histogram hist;
401  srp.dequeue(nbr_gid, hist);
402  for (size_t i = 0; i < hist.size(); ++i)
403  histogram[i] += hist[i];
404  }
405 }
406 
407 template<class Block, class Point>
408 void
409 diy::detail::KDTreePartition<Block,Point>::
410 receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
411 {
412  srp.dequeue(srp.in_link().target(0).gid, histogram);
413 }
414 
415 template<class Block, class Point>
416 void
417 diy::detail::KDTreePartition<Block,Point>::
418 forward_histogram(Block* b, const diy::ReduceProxy& srp, const Histogram& histogram) const
419 {
420  for (int i = 0; i < srp.out_link().size(); ++i)
421  srp.enqueue(srp.out_link().target(i), histogram);
422 }
423 
424 template<class Block, class Point>
425 void
426 diy::detail::KDTreePartition<Block,Point>::
427 enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Histogram& histogram) const
428 {
429  auto log = get_logger();
430 
431  int lid = srp.master()->lid(srp.gid());
432  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
433 
434  int k = srp.out_link().size();
435 
436  if (k == 0) // final round; nothing needs to be sent; this is actually redundant
437  return;
438 
439  // pick split points
440  size_t total = 0;
441  for (size_t i = 0; i < histogram.size(); ++i)
442  total += histogram[i];
443  log->trace("Histogram total: {}", total);
444 
445  size_t cur = 0;
446  float width = (link->core().max[dim] - link->core().min[dim])/bins_;
447  float split = 0;
448  for (size_t i = 0; i < histogram.size(); ++i)
449  {
450  if (cur + histogram[i] > total/2)
451  {
452  split = link->core().min[dim] + width*i;
453  break;
454  }
455  cur += histogram[i];
456  }
457  log->trace("Found split: {} (dim={}) in {} - {}", split, dim, link->core().min[dim], link->core().max[dim]);
458 
459  // subset and enqueue
460  std::vector< std::vector<Point> > out_points(srp.out_link().size());
461  for (size_t i = 0; i < (b->*points_).size(); ++i)
462  {
463  float x = (b->*points_)[i][dim];
464  int loc = x < split ? 0 : 1;
465  out_points[loc].push_back((b->*points_)[i]);
466  }
467  int pos = -1;
468  for (int i = 0; i < k; ++i)
469  {
470  if (srp.out_link().target(i).gid == srp.gid())
471  {
472  (b->*points_).swap(out_points[i]);
473  pos = i;
474  }
475  else
476  srp.enqueue(srp.out_link().target(i), out_points[i]);
477  }
478  if (pos == 0)
479  link->core().max[dim] = split;
480  else
481  link->core().min[dim] = split;
482 }
483 
484 template<class Block, class Point>
485 void
486 diy::detail::KDTreePartition<Block,Point>::
487 dequeue_exchange(Block* b, const diy::ReduceProxy& srp, int dim) const
488 {
489  int lid = srp.master()->lid(srp.gid());
490  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
491 
492  for (int i = 0; i < srp.in_link().size(); ++i)
493  {
494  int nbr_gid = srp.in_link().target(i).gid;
495  if (nbr_gid == srp.gid())
496  continue;
497 
498  std::vector<Point> in_points;
499  srp.dequeue(nbr_gid, in_points);
500  for (size_t j = 0; j < in_points.size(); ++j)
501  {
502  if (in_points[j][dim] < link->core().min[dim] || in_points[j][dim] > link->core().max[dim])
503  throw std::runtime_error(fmt::format("Dequeued {} outside [{},{}] ({})",
504  in_points[j][dim], link->core().min[dim], link->core().max[dim], dim));
505  (b->*points_).push_back(in_points[j]);
506  }
507  }
508 }
509 
510 template<class Block, class Point>
511 void
512 diy::detail::KDTreePartition<Block,Point>::
513 update_neighbor_bounds(Bounds& bounds, float split, int dim, bool lower) const
514 {
515  if (lower)
516  bounds.max[dim] = split;
517  else
518  bounds.min[dim] = split;
519 }
520 
521 template<class Block, class Point>
522 bool
523 diy::detail::KDTreePartition<Block,Point>::
524 intersects(const Bounds& x, const Bounds& y, int dim, bool wrap, const Bounds& domain) const
525 {
526  if (wrap)
527  {
528  if (x.min[dim] == domain.min[dim] && y.max[dim] == domain.max[dim])
529  return true;
530  if (y.min[dim] == domain.min[dim] && x.max[dim] == domain.max[dim])
531  return true;
532  }
533  return x.min[dim] <= y.max[dim] && y.min[dim] <= x.max[dim];
534 }
535 
536 template<class Block, class Point>
537 float
538 diy::detail::KDTreePartition<Block,Point>::
539 find_split(const Bounds& changed, const Bounds& original) const
540 {
541  for (int i = 0; i < dim_; ++i)
542  {
543  if (changed.min[i] != original.min[i])
544  return changed.min[i];
545  if (changed.max[i] != original.max[i])
546  return changed.max[i];
547  }
548  assert(0);
549  return -1;
550 }
551 
552 template<class Block, class Point>
554 diy::detail::KDTreePartition<Block,Point>::
555 find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
556 {
557  diy::Direction wrap;
558  for (int i = 0; i < dim_; ++i)
559  {
560  if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])
561  wrap[i] = -1;
562  if (bounds.max[i] == domain.max[i] && nbr_bounds.min[i] == domain.min[i])
563  wrap[i] = 1;
564  }
565  return wrap;
566 }
567 
568 
569 #endif
const Link & out_link() const
returns outgoing link
Definition: reduce.hpp:70
Enables communication within a group during a reduction. DIY creates the ReduceProxy for you in diy::...
Definition: reduce.hpp:15
void dequeue(int from, T &x, void(*load)(BinaryBuffer &, T &)=&::diy::load< T >) const
Dequeue data whose size can be determined automatically (e.g., STL vector) and that was previously en...
Definition: proxy.hpp:42
unsigned round() const
returns current round number
Definition: reduce.hpp:66
Definition: types.hpp:10
Decomposes a regular (discrete or continuous) domain into even blocks; creates Links with Bounds alon...
Definition: decomposition.hpp:75
virtual int rank(int gid) const =0
returns the process rank of the block with global id gid (need not be local)
Allreduce (reduction with results broadcasted to all blocks) is implemented as two merge reductions...
Definition: all-reduce.hpp:20
const Link & in_link() const
returns incoming link
Definition: reduce.hpp:68
Definition: master.hpp:35
Definition: types.hpp:16
diy::DiscreteBounds interval(int from, int to)
Helper to create a 1-dimensional discrete domain with the specified extents.
Definition: types.hpp:28
Definition: types.hpp:30
const Assigner & assigner() const
returns the assigner
Definition: reduce.hpp:74
Partners for swap-reduce.
Definition: swap.hpp:16
int lid(int gid) const
return the local id of the local block with global id gid, or -1 if not local
Definition: master.hpp:221
void enqueue(const BlockID &to, const T &x, void(*save)(BinaryBuffer &, const T &)=&::diy::save< T >) const
Enqueue data whose size can be determined automatically, e.g., an STL vector.
Definition: proxy.hpp:24