IOSS  2.0
robin_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ROBIN_GROWTH_POLICY_H
25 #define TSL_ROBIN_GROWTH_POLICY_H
26 
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <iterator>
33 #include <limits>
34 #include <ratio>
35 #include <stdexcept>
36 
37 #ifndef tsl_assert
38 #ifdef TSL_DEBUG
39 #define tsl_assert(expr) assert(expr)
40 #else
41 #define tsl_assert(expr) (static_cast<void>(0))
42 #endif
43 #endif
44 
45 /**
46  * If exceptions are enabled, throw the exception passed in parameter, otherwise call
47  * std::terminate.
48  */
49 #ifndef TSL_THROW_OR_TERMINATE
50 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
51  (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
52  !defined(TSL_NO_EXCEPTIONS)
53 #define TSL_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
54 #else
55 #ifdef NDEBUG
56 #define TSL_THROW_OR_TERMINATE(ex, msg) std::terminate()
57 #else
58 #include <cstdio>
59 #define TSL_THROW_OR_TERMINATE(ex, msg) \
60  do { \
61  std::fprintf(stderr, msg); \
62  std::terminate(); \
63  } while (0)
64 #endif
65 #endif
66 #endif
67 
68 #ifndef TSL_LIKELY
69 #if defined(__GNUC__) || defined(__clang__)
70 #define TSL_LIKELY(exp) (__builtin_expect(!!(exp), true))
71 #else
72 #define TSL_LIKELY(exp) (exp)
73 #endif
74 #endif
75 
76 namespace tsl {
77  namespace rh {
78 
79  /**
80  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two.
81  * It allows the table to use a mask operation instead of a modulo operation to map a hash to a
82  * bucket.
83  *
84  * GrowthFactor must be a power of two >= 2.
85  */
86  template <std::size_t GrowthFactor> class power_of_two_growth_policy
87  {
88  public:
89  /**
90  * Called on the hash table creation and on rehash. The number of buckets for the table is
91  * passed in parameter. This number is a minimum, the policy may update this value with a
92  * higher value if needed (but not lower).
93  *
94  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
95  * bucket_for_hash must always return 0 in this case.
96  */
97  explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
98  {
99  if (min_bucket_count_in_out > max_bucket_count()) {
100  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
101  }
102 
103  if (min_bucket_count_in_out > 0) {
104  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
105  m_mask = min_bucket_count_in_out - 1;
106  }
107  else {
108  m_mask = 0;
109  }
110  }
111 
112  /**
113  * Return the bucket [0, bucket_count()) to which the hash belongs.
114  * If bucket_count() is 0, it must always return 0.
115  */
116  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
117 
118  /**
119  * Return the number of buckets that should be used on next growth.
120  */
121  std::size_t next_bucket_count() const
122  {
123  if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
124  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
125  }
126 
127  return (m_mask + 1) * GrowthFactor;
128  }
129 
130  /**
131  * Return the maximum number of buckets supported by the policy.
132  */
133  std::size_t max_bucket_count() const
134  {
135  // Largest power of two.
136  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
137  }
138 
139  /**
140  * Reset the growth policy as if it was created with a bucket count of 0.
141  * After a clear, the policy must always return 0 when bucket_for_hash is called.
142  */
143  void clear() noexcept { m_mask = 0; }
144 
145  private:
146  static std::size_t round_up_to_power_of_two(std::size_t value)
147  {
148  if (is_power_of_two(value)) {
149  return value;
150  }
151 
152  if (value == 0) {
153  return 1;
154  }
155 
156  --value;
157  for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
158  value |= value >> i;
159  }
160 
161  return value + 1;
162  }
163 
164  static constexpr bool is_power_of_two(std::size_t value)
165  {
166  return value != 0 && (value & (value - 1)) == 0;
167  }
168 
169  protected:
170  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
171  "GrowthFactor must be a power of two >= 2.");
172 
173  std::size_t m_mask;
174  };
175 
176  /**
177  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
178  * to a bucket. Slower but it can be useful if you want a slower growth.
179  */
180  template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
181  {
182  public:
183  explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
184  {
185  if (min_bucket_count_in_out > max_bucket_count()) {
186  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
187  }
188 
189  if (min_bucket_count_in_out > 0) {
190  m_mod = min_bucket_count_in_out;
191  }
192  else {
193  m_mod = 1;
194  }
195  }
196 
197  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
198 
199  std::size_t next_bucket_count() const
200  {
201  if (m_mod == max_bucket_count()) {
202  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
203  }
204 
205  const double next_bucket_count_ =
206  std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
207  if (!std::isnormal(next_bucket_count_)) {
208  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
209  }
210 
211  if (next_bucket_count_ > double(max_bucket_count())) {
212  return max_bucket_count();
213  }
214  else {
215  return std::size_t(next_bucket_count_);
216  }
217  }
218 
219  std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
220 
221  void clear() noexcept { m_mod = 1; }
222 
223  private:
224  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
225  1.0 * GrowthFactor::num / GrowthFactor::den;
226  static const std::size_t MAX_BUCKET_COUNT = std::size_t(
227  double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
228 
229  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
230 
231  std::size_t m_mod;
232  };
233 
234  namespace detail {
235 
236  static constexpr const std::array<std::size_t, 40> PRIMES = {
237  {1ul, 5ul, 17ul, 29ul, 37ul, 53ul,
238  67ul, 79ul, 97ul, 131ul, 193ul, 257ul,
239  389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul,
240  3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul,
241  196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
242  12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
243  805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}};
244 
245  template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
246  {
247  return hash % PRIMES[IPrime];
248  }
249 
250  // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo
251  // as the compiler can optimize the modulo code better with a constant known at the
252  // compilation.
253  static constexpr const std::array<std::size_t (*)(std::size_t), 40> MOD_PRIME = {
254  {&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>,
255  &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>,
256  &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, &mod<23>,
257  &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>,
258  &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}};
259 
260  } // namespace detail
261 
262  /**
263  * Grow the hash table by using prime numbers as bucket count. Slower than
264  * tsl::rh::power_of_two_growth_policy in general but will probably distribute the values around
265  * better in the buckets with a poor hash function.
266  *
267  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant
268  * primes numbers.
269  *
270  * With a switch the code would look like:
271  * \code
272  * switch(iprime) { // iprime is the current prime of the hash table
273  * case 0: hash % 5ul;
274  * break;
275  * case 1: hash % 17ul;
276  * break;
277  * case 2: hash % 29ul;
278  * break;
279  * ...
280  * }
281  * \endcode
282  *
283  * Due to the constant variable in the modulo the compiler is able to optimize the operation
284  * by a series of multiplications, substractions and shifts.
285  *
286  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64
287  * bits environement.
288  */
290  {
291  public:
292  explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
293  {
294  auto it_prime =
295  std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
296  if (it_prime == detail::PRIMES.end()) {
297  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
298  }
299 
300  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
301  if (min_bucket_count_in_out > 0) {
302  min_bucket_count_in_out = *it_prime;
303  }
304  else {
305  min_bucket_count_in_out = 0;
306  }
307  }
308 
309  std::size_t bucket_for_hash(std::size_t hash) const noexcept
310  {
311  return detail::MOD_PRIME[m_iprime](hash);
312  }
313 
314  std::size_t next_bucket_count() const
315  {
316  if (m_iprime + 1 >= detail::PRIMES.size()) {
317  TSL_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
318  }
319 
320  return detail::PRIMES[m_iprime + 1];
321  }
322 
323  std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
324 
325  void clear() noexcept { m_iprime = 0; }
326 
327  private:
328  unsigned int m_iprime;
329 
330  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
331  "The type of m_iprime is not big enough.");
332  };
333 
334  } // namespace rh
335 } // namespace tsl
336 
337 #endif
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:116
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:199
std::size_t m_mod
Definition: robin_growth_policy.h:229
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:309
static constexpr std::size_t mod(std::size_t hash)
Definition: robin_growth_policy.h:245
void clear() noexcept
Definition: robin_growth_policy.h:221
Definition: hopscotch_growth_policy.h:37
static constexpr const std::array< std::size_t(*)(std::size_t), 40 > MOD_PRIME
Definition: robin_growth_policy.h:253
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:323
#define TSL_THROW_OR_TERMINATE(ex, msg)
Definition: robin_growth_policy.h:59
static const std::size_t MAX_BUCKET_COUNT
Definition: robin_growth_policy.h:226
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:183
void clear() noexcept
Definition: robin_growth_policy.h:143
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:133
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:314
Definition: robin_growth_policy.h:180
static constexpr const std::array< std::size_t, 40 > PRIMES
Definition: robin_growth_policy.h:236
std::size_t m_mask
Definition: robin_growth_policy.h:171
void clear() noexcept
Definition: robin_growth_policy.h:325
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:292
static constexpr bool is_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:164
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition: robin_growth_policy.h:224
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:97
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:219
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:146
Definition: robin_growth_policy.h:289
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:121
unsigned int m_iprime
Definition: robin_growth_policy.h:328
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:197
Definition: robin_growth_policy.h:86