DIY  3.0
data-parallel out-of-core C++ library
 All Classes Namespaces Functions Typedefs Groups Pages
kdtree-sampling.hpp
1 #ifndef DIY_DETAIL_ALGORITHMS_KDTREE_SAMPLING_HPP
2 #define DIY_DETAIL_ALGORITHMS_KDTREE_SAMPLING_HPP
3 
4 #include <vector>
5 #include <cassert>
6 #include "../../partners/all-reduce.hpp"
7 #include "../../log.hpp"
8 
9 // TODO: technically, what's done now is not a perfect subsample:
10 // we take the same number of samples from every block, in reality this number should be selected at random,
11 // so that the total number of samples adds up to samples*nblocks
12 //
13 // NB: random samples are chosen using rand(), which is assumed to be seeded
14 // externally. Once we switch to C++11, we should use its more advanced
15 // random number generators (and take a generator as an external parameter)
16 // (TODO)
17 
18 namespace diy
19 {
20 namespace detail
21 {
22 
23 template<class Block, class Point>
24 struct KDTreeSamplingPartition
25 {
26  typedef diy::RegularContinuousLink RCLink;
27  typedef diy::ContinuousBounds Bounds;
28 
29  typedef std::vector<float> Samples;
30 
31  KDTreeSamplingPartition(int dim,
32  std::vector<Point> Block::* points,
33  size_t samples):
34  dim_(dim), points_(points), samples_(samples) {}
35 
36  void operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners) const;
37 
38  int divide_gid(int gid, bool lower, int round, int rounds) const;
39  void update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const;
40  void split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const;
42  find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const;
43 
44  void compute_local_samples(Block* b, const diy::ReduceProxy& srp, int dim) const;
45  void add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const;
46  void receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const;
47  void forward_samples(Block* b, const diy::ReduceProxy& srp, const Samples& samples) const;
48 
49  void enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Samples& samples) const;
50  void dequeue_exchange(Block* b, const diy::ReduceProxy& srp, int dim) const;
51 
52  void update_neighbor_bounds(Bounds& bounds, float split, int dim, bool lower) const;
53  bool intersects(const Bounds& x, const Bounds& y, int dim, bool wrap, const Bounds& domain) const;
54  float find_split(const Bounds& changed, const Bounds& original) const;
55 
56  int dim_;
57  std::vector<Point> Block::* points_;
58  size_t samples_;
59 };
60 
61 }
62 }
63 
64 
65 template<class Block, class Point>
66 void
67 diy::detail::KDTreeSamplingPartition<Block,Point>::
68 operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners) const
69 {
70  int dim;
71  if (srp.round() < partners.rounds())
72  dim = partners.dim(srp.round());
73  else
74  dim = partners.dim(srp.round() - 1);
75 
76  if (srp.round() == partners.rounds())
77  update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
78  else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0) // link round
79  {
80  dequeue_exchange(b, srp, dim); // from the swap round
81  split_to_neighbors(b, srp, dim);
82  }
83  else if (partners.swap_round(srp.round()))
84  {
85  Samples samples;
86  receive_samples(b, srp, samples);
87  enqueue_exchange(b, srp, dim, samples);
88  } else if (partners.sub_round(srp.round()) == 0)
89  {
90  if (srp.round() > 0)
91  {
92  int prev_dim = dim - 1;
93  if (prev_dim < 0)
94  prev_dim += dim_;
95  update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
96  }
97 
98  compute_local_samples(b, srp, dim);
99  } else if (partners.sub_round(srp.round()) < (int) partners.histogram.rounds()/2) // we are reusing partners class, so really we are talking about the samples rounds here
100  {
101  Samples samples;
102  add_samples(b, srp, samples);
103  srp.enqueue(srp.out_link().target(0), samples);
104  } else
105  {
106  Samples samples;
107  add_samples(b, srp, samples);
108  if (samples.size() != 1)
109  {
110  // pick the median
111  std::nth_element(samples.begin(), samples.begin() + samples.size()/2, samples.end());
112  std::swap(samples[0], samples[samples.size()/2]);
113  //std::sort(samples.begin(), samples.end());
114  //samples[0] = (samples[samples.size()/2] + samples[samples.size()/2 + 1])/2;
115  samples.resize(1);
116  }
117  forward_samples(b, srp, samples);
118  }
119 }
120 
121 template<class Block, class Point>
122 int
123 diy::detail::KDTreeSamplingPartition<Block,Point>::
124 divide_gid(int gid, bool lower, int round, int rounds) const
125 {
126  if (lower)
127  gid &= ~(1 << (rounds - 1 - round));
128  else
129  gid |= (1 << (rounds - 1 - round));
130  return gid;
131 }
132 
133 // round here is the outer iteration of the algorithm
134 template<class Block, class Point>
135 void
136 diy::detail::KDTreeSamplingPartition<Block,Point>::
137 update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
138 {
139  auto log = get_logger();
140  int gid = srp.gid();
141  int lid = srp.master()->lid(gid);
142  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
143 
144  // (gid, dir) -> i
145  std::map<std::pair<int,diy::Direction>, int> link_map;
146  for (int i = 0; i < link->size(); ++i)
147  link_map[std::make_pair(link->target(i).gid, link->direction(i))] = i;
148 
149  // NB: srp.enqueue(..., ...) should match the link
150  std::vector<float> splits(link->size());
151  for (int i = 0; i < link->size(); ++i)
152  {
153  float split; diy::Direction dir;
154 
155  int in_gid = link->target(i).gid;
156  while(srp.incoming(in_gid))
157  {
158  srp.dequeue(in_gid, split);
159  srp.dequeue(in_gid, dir);
160 
161  // reverse dir
162  for (int j = 0; j < dim_; ++j)
163  dir[j] = -dir[j];
164 
165  int k = link_map[std::make_pair(in_gid, dir)];
166  log->trace("{} {} {} -> {}", in_gid, dir, split, k);
167  splits[k] = split;
168  }
169  }
170 
171  RCLink new_link(dim_, link->core(), link->core());
172 
173  bool lower = !(gid & (1 << (rounds - 1 - round)));
174 
175  // fill out the new link
176  for (int i = 0; i < link->size(); ++i)
177  {
178  diy::Direction dir = link->direction(i);
179  //diy::Direction wrap_dir = link->wrap(i); // we don't use existing wrap, but restore it from scratch
180  if (dir[dim] != 0)
181  {
182  if ((dir[dim] < 0 && lower) || (dir[dim] > 0 && !lower))
183  {
184  int nbr_gid = divide_gid(link->target(i).gid, !lower, round, rounds);
185  diy::BlockID nbr = { nbr_gid, srp.assigner().rank(nbr_gid) };
186  new_link.add_neighbor(nbr);
187 
188  new_link.add_direction(dir);
189 
190  Bounds bounds = link->bounds(i);
191  update_neighbor_bounds(bounds, splits[i], dim, !lower);
192  new_link.add_bounds(bounds);
193 
194  if (wrap)
195  new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
196  else
197  new_link.add_wrap(diy::Direction());
198  }
199  } else // non-aligned side
200  {
201  for (int j = 0; j < 2; ++j)
202  {
203  int nbr_gid = divide_gid(link->target(i).gid, j == 0, round, rounds);
204 
205  Bounds bounds = link->bounds(i);
206  update_neighbor_bounds(bounds, splits[i], dim, j == 0);
207 
208  if (intersects(bounds, new_link.bounds(), dim, wrap, domain))
209  {
210  diy::BlockID nbr = { nbr_gid, srp.assigner().rank(nbr_gid) };
211  new_link.add_neighbor(nbr);
212  new_link.add_direction(dir);
213  new_link.add_bounds(bounds);
214 
215  if (wrap)
216  new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
217  else
218  new_link.add_wrap(diy::Direction());
219  }
220  }
221  }
222  }
223 
224  // add link to the dual block
225  int dual_gid = divide_gid(gid, !lower, round, rounds);
226  diy::BlockID dual = { dual_gid, srp.assigner().rank(dual_gid) };
227  new_link.add_neighbor(dual);
228 
229  Bounds nbr_bounds = link->bounds(); // old block bounds
230  update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
231  new_link.add_bounds(nbr_bounds);
232 
233  new_link.add_wrap(diy::Direction()); // dual block cannot be wrapped
234 
235  if (lower)
236  {
237  diy::Direction right;
238  right[dim] = 1;
239  new_link.add_direction(right);
240  } else
241  {
242  diy::Direction left;
243  left[dim] = -1;
244  new_link.add_direction(left);
245  }
246 
247  // update the link; notice that this won't conflict with anything since
248  // reduce is using its own notion of the link constructed through the
249  // partners
250  link->swap(new_link);
251 }
252 
253 template<class Block, class Point>
254 void
255 diy::detail::KDTreeSamplingPartition<Block,Point>::
256 split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
257 {
258  int lid = srp.master()->lid(srp.gid());
259  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
260 
261  // determine split
262  float split = find_split(link->core(), link->bounds());
263 
264  for (int i = 0; i < link->size(); ++i)
265  {
266  srp.enqueue(link->target(i), split);
267  srp.enqueue(link->target(i), link->direction(i));
268  }
269 }
270 
271 template<class Block, class Point>
272 void
273 diy::detail::KDTreeSamplingPartition<Block,Point>::
274 compute_local_samples(Block* b, const diy::ReduceProxy& srp, int dim) const
275 {
276  // compute and enqueue local samples
277  Samples samples;
278  size_t points_size = (b->*points_).size();
279  size_t n = std::min(points_size, samples_);
280  samples.reserve(n);
281  for (size_t i = 0; i < n; ++i)
282  {
283  float x = (b->*points_)[rand() % points_size][dim];
284  samples.push_back(x);
285  }
286 
287  srp.enqueue(srp.out_link().target(0), samples);
288 }
289 
290 template<class Block, class Point>
291 void
292 diy::detail::KDTreeSamplingPartition<Block,Point>::
293 add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
294 {
295  // dequeue and combine the samples
296  for (int i = 0; i < srp.in_link().size(); ++i)
297  {
298  int nbr_gid = srp.in_link().target(i).gid;
299 
300  Samples smpls;
301  srp.dequeue(nbr_gid, smpls);
302  for (size_t j = 0; j < smpls.size(); ++j)
303  samples.push_back(smpls[j]);
304  }
305 }
306 
307 template<class Block, class Point>
308 void
309 diy::detail::KDTreeSamplingPartition<Block,Point>::
310 receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
311 {
312  srp.dequeue(srp.in_link().target(0).gid, samples);
313 }
314 
315 template<class Block, class Point>
316 void
317 diy::detail::KDTreeSamplingPartition<Block,Point>::
318 forward_samples(Block* b, const diy::ReduceProxy& srp, const Samples& samples) const
319 {
320  for (int i = 0; i < srp.out_link().size(); ++i)
321  srp.enqueue(srp.out_link().target(i), samples);
322 }
323 
324 template<class Block, class Point>
325 void
326 diy::detail::KDTreeSamplingPartition<Block,Point>::
327 enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Samples& samples) const
328 {
329  int lid = srp.master()->lid(srp.gid());
330  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
331 
332  int k = srp.out_link().size();
333 
334  if (k == 0) // final round; nothing needs to be sent; this is actually redundant
335  return;
336 
337  // pick split points
338  float split = samples[0];
339 
340  // subset and enqueue
341  std::vector< std::vector<Point> > out_points(srp.out_link().size());
342  for (size_t i = 0; i < (b->*points_).size(); ++i)
343  {
344  float x = (b->*points_)[i][dim];
345  int loc = x < split ? 0 : 1;
346  out_points[loc].push_back((b->*points_)[i]);
347  }
348  int pos = -1;
349  for (int i = 0; i < k; ++i)
350  {
351  if (srp.out_link().target(i).gid == srp.gid())
352  {
353  (b->*points_).swap(out_points[i]);
354  pos = i;
355  }
356  else
357  srp.enqueue(srp.out_link().target(i), out_points[i]);
358  }
359  if (pos == 0)
360  link->core().max[dim] = split;
361  else
362  link->core().min[dim] = split;
363 }
364 
365 template<class Block, class Point>
366 void
367 diy::detail::KDTreeSamplingPartition<Block,Point>::
368 dequeue_exchange(Block* b, const diy::ReduceProxy& srp, int dim) const
369 {
370  int lid = srp.master()->lid(srp.gid());
371  RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
372 
373  for (int i = 0; i < srp.in_link().size(); ++i)
374  {
375  int nbr_gid = srp.in_link().target(i).gid;
376  if (nbr_gid == srp.gid())
377  continue;
378 
379  std::vector<Point> in_points;
380  srp.dequeue(nbr_gid, in_points);
381  for (size_t j = 0; j < in_points.size(); ++j)
382  {
383  if (in_points[j][dim] < link->core().min[dim] || in_points[j][dim] > link->core().max[dim])
384  throw std::runtime_error(fmt::format("Dequeued {} outside [{},{}] ({})",
385  in_points[j][dim], link->core().min[dim], link->core().max[dim], dim));
386  (b->*points_).push_back(in_points[j]);
387  }
388  }
389 }
390 
391 template<class Block, class Point>
392 void
393 diy::detail::KDTreeSamplingPartition<Block,Point>::
394 update_neighbor_bounds(Bounds& bounds, float split, int dim, bool lower) const
395 {
396  if (lower)
397  bounds.max[dim] = split;
398  else
399  bounds.min[dim] = split;
400 }
401 
402 template<class Block, class Point>
403 bool
404 diy::detail::KDTreeSamplingPartition<Block,Point>::
405 intersects(const Bounds& x, const Bounds& y, int dim, bool wrap, const Bounds& domain) const
406 {
407  if (wrap)
408  {
409  if (x.min[dim] == domain.min[dim] && y.max[dim] == domain.max[dim])
410  return true;
411  if (y.min[dim] == domain.min[dim] && x.max[dim] == domain.max[dim])
412  return true;
413  }
414  return x.min[dim] <= y.max[dim] && y.min[dim] <= x.max[dim];
415 }
416 
417 template<class Block, class Point>
418 float
419 diy::detail::KDTreeSamplingPartition<Block,Point>::
420 find_split(const Bounds& changed, const Bounds& original) const
421 {
422  for (int i = 0; i < dim_; ++i)
423  {
424  if (changed.min[i] != original.min[i])
425  return changed.min[i];
426  if (changed.max[i] != original.max[i])
427  return changed.max[i];
428  }
429  assert(0);
430  return -1;
431 }
432 
433 template<class Block, class Point>
435 diy::detail::KDTreeSamplingPartition<Block,Point>::
436 find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
437 {
438  diy::Direction wrap;
439  for (int i = 0; i < dim_; ++i)
440  {
441  if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])
442  wrap[i] = -1;
443  if (bounds.max[i] == domain.max[i] && nbr_bounds.min[i] == domain.min[i])
444  wrap[i] = 1;
445  }
446  return wrap;
447 }
448 
449 
450 #endif
const Link & out_link() const
returns outgoing link
Definition: reduce.hpp:70
Enables communication within a group during a reduction. DIY creates the ReduceProxy for you in diy::...
Definition: reduce.hpp:15
void dequeue(int from, T &x, void(*load)(BinaryBuffer &, T &)=&::diy::load< T >) const
Dequeue data whose size can be determined automatically (e.g., STL vector) and that was previously en...
Definition: proxy.hpp:42
unsigned round() const
returns current round number
Definition: reduce.hpp:66
Definition: types.hpp:10
virtual int rank(int gid) const =0
returns the process rank of the block with global id gid (need not be local)
int lid(int gid__) const
return the local id of the local block with global id gid, or -1 if not local
Definition: master.hpp:221
const Link & in_link() const
returns incoming link
Definition: reduce.hpp:68
Definition: types.hpp:16
Definition: types.hpp:30
const Assigner & assigner() const
returns the assigner
Definition: reduce.hpp:74
void enqueue(const BlockID &to, const T &x, void(*save)(BinaryBuffer &, const T &)=&::diy::save< T >) const
Enqueue data whose size can be determined automatically, e.g., an STL vector.
Definition: proxy.hpp:24