IOSS  2.0
robin_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ROBIN_GROWTH_POLICY_H
25 #define TSL_ROBIN_GROWTH_POLICY_H
26 
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <iterator>
33 #include <limits>
34 #include <ratio>
35 #include <stdexcept>
36 
37 #ifdef TSL_DEBUG
38 #define tsl_rh_assert(expr) assert(expr)
39 #else
40 #define tsl_rh_assert(expr) (static_cast<void>(0))
41 #endif
42 
43 /**
44  * If exceptions are enabled, throw the exception passed in parameter, otherwise call
45  * std::terminate.
46  */
47 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
48  (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
49  !defined(TSL_NO_EXCEPTIONS)
50 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
51 #else
52 #ifdef NDEBUG
53 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
54 #else
55 #include <cstdio>
56 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
57  do { \
58  std::fprintf(stderr, msg); \
59  std::terminate(); \
60  } while (0)
61 #endif
62 #endif
63 
64 #if defined(__GNUC__) || defined(__clang__)
65 #define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
66 #else
67 #define TSL_RH_LIKELY(exp) (exp)
68 #endif
69 
70 namespace tsl {
71  namespace rh {
72 
73  /**
74  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two.
75  * It allows the table to use a mask operation instead of a modulo operation to map a hash to a
76  * bucket.
77  *
78  * GrowthFactor must be a power of two >= 2.
79  */
80  template <std::size_t GrowthFactor> class power_of_two_growth_policy
81  {
82  public:
83  /**
84  * Called on the hash table creation and on rehash. The number of buckets for the table is
85  * passed in parameter. This number is a minimum, the policy may update this value with a
86  * higher value if needed (but not lower).
87  *
88  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
89  * bucket_for_hash must always return 0 in this case.
90  */
91  explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
92  {
93  if (min_bucket_count_in_out > max_bucket_count()) {
94  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
95  }
96 
97  if (min_bucket_count_in_out > 0) {
98  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
99  m_mask = min_bucket_count_in_out - 1;
100  }
101  else {
102  m_mask = 0;
103  }
104  }
105 
106  /**
107  * Return the bucket [0, bucket_count()) to which the hash belongs.
108  * If bucket_count() is 0, it must always return 0.
109  */
110  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
111 
112  /**
113  * Return the number of buckets that should be used on next growth.
114  */
115  std::size_t next_bucket_count() const
116  {
117  if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
118  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
119  }
120 
121  return (m_mask + 1) * GrowthFactor;
122  }
123 
124  /**
125  * Return the maximum number of buckets supported by the policy.
126  */
127  std::size_t max_bucket_count() const
128  {
129  // Largest power of two.
130  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
131  }
132 
133  /**
134  * Reset the growth policy as if it was created with a bucket count of 0.
135  * After a clear, the policy must always return 0 when bucket_for_hash is called.
136  */
137  void clear() noexcept { m_mask = 0; }
138 
139  private:
140  static std::size_t round_up_to_power_of_two(std::size_t value)
141  {
142  if (is_power_of_two(value)) {
143  return value;
144  }
145 
146  if (value == 0) {
147  return 1;
148  }
149 
150  --value;
151  for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
152  value |= value >> i;
153  }
154 
155  return value + 1;
156  }
157 
158  static constexpr bool is_power_of_two(std::size_t value)
159  {
160  return value != 0 && (value & (value - 1)) == 0;
161  }
162 
163  protected:
164  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
165  "GrowthFactor must be a power of two >= 2.");
166 
167  std::size_t m_mask;
168  };
169 
170  /**
171  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
172  * to a bucket. Slower but it can be useful if you want a slower growth.
173  */
174  template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
175  {
176  public:
177  explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
178  {
179  if (min_bucket_count_in_out > max_bucket_count()) {
180  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
181  }
182 
183  if (min_bucket_count_in_out > 0) {
184  m_mod = min_bucket_count_in_out;
185  }
186  else {
187  m_mod = 1;
188  }
189  }
190 
191  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
192 
193  std::size_t next_bucket_count() const
194  {
195  if (m_mod == max_bucket_count()) {
196  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
197  }
198 
199  const double my_next_bucket_count =
200  std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
201  if (!std::isnormal(my_next_bucket_count)) {
202  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
203  }
204 
205  if (my_next_bucket_count > double(max_bucket_count())) {
206  return max_bucket_count();
207  }
208  else {
209  return std::size_t(my_next_bucket_count);
210  }
211  }
212 
213  std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
214 
215  void clear() noexcept { m_mod = 1; }
216 
217  private:
218  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
219  1.0 * GrowthFactor::num / GrowthFactor::den;
220  static const std::size_t MAX_BUCKET_COUNT = std::size_t(
221  double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
222 
223  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
224 
225  std::size_t m_mod;
226  };
227 
228  namespace detail {
229 
230  static constexpr const std::array<std::size_t, 40> PRIMES = {
231  {1ul, 5ul, 17ul, 29ul, 37ul, 53ul,
232  67ul, 79ul, 97ul, 131ul, 193ul, 257ul,
233  389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul,
234  3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul,
235  196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
236  12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
237  805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}};
238 
239  template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
240  {
241  return hash % PRIMES[IPrime];
242  }
243 
244  // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo
245  // as the compiler can optimize the modulo code better with a constant known at the
246  // compilation.
247  static constexpr const std::array<std::size_t (*)(std::size_t), 40> MOD_PRIME = {
248  {&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>,
249  &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>,
250  &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, &mod<23>,
251  &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>,
252  &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}};
253 
254  } // namespace detail
255 
256  /**
257  * Grow the hash table by using prime numbers as bucket count. Slower than
258  * tsl::rh::power_of_two_growth_policy in general but will probably distribute the values around
259  * better in the buckets with a poor hash function.
260  *
261  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant
262  * primes numbers.
263  *
264  * With a switch the code would look like:
265  * \code
266  * switch(iprime) { // iprime is the current prime of the hash table
267  * case 0: hash % 5ul;
268  * break;
269  * case 1: hash % 17ul;
270  * break;
271  * case 2: hash % 29ul;
272  * break;
273  * ...
274  * }
275  * \endcode
276  *
277  * Due to the constant variable in the modulo the compiler is able to optimize the operation
278  * by a series of multiplications, substractions and shifts.
279  *
280  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64
281  * bits environement.
282  */
284  {
285  public:
286  explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
287  {
288  auto it_prime =
289  std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
290  if (it_prime == detail::PRIMES.end()) {
291  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
292  }
293 
294  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
295  if (min_bucket_count_in_out > 0) {
296  min_bucket_count_in_out = *it_prime;
297  }
298  else {
299  min_bucket_count_in_out = 0;
300  }
301  }
302 
303  std::size_t bucket_for_hash(std::size_t hash) const noexcept
304  {
305  return detail::MOD_PRIME[m_iprime](hash);
306  }
307 
308  std::size_t next_bucket_count() const
309  {
310  if (m_iprime + 1 >= detail::PRIMES.size()) {
311  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
312  }
313 
314  return detail::PRIMES[m_iprime + 1];
315  }
316 
317  std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
318 
319  void clear() noexcept { m_iprime = 0; }
320 
321  private:
322  unsigned int m_iprime;
323 
324  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
325  "The type of m_iprime is not big enough.");
326  };
327 
328  } // namespace rh
329 } // namespace tsl
330 
331 #endif
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:110
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:193
std::size_t m_mod
Definition: robin_growth_policy.h:223
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:303
static constexpr std::size_t mod(std::size_t hash)
Definition: robin_growth_policy.h:239
void clear() noexcept
Definition: robin_growth_policy.h:215
Definition: hopscotch_growth_policy.h:37
static constexpr const std::array< std::size_t(*)(std::size_t), 40 > MOD_PRIME
Definition: robin_growth_policy.h:247
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:317
static const std::size_t MAX_BUCKET_COUNT
Definition: robin_growth_policy.h:220
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:177
void clear() noexcept
Definition: robin_growth_policy.h:137
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:127
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:308
Definition: robin_growth_policy.h:174
static constexpr const std::array< std::size_t, 40 > PRIMES
Definition: robin_growth_policy.h:230
std::size_t m_mask
Definition: robin_growth_policy.h:165
void clear() noexcept
Definition: robin_growth_policy.h:319
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:286
static constexpr bool is_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:158
#define TSL_RH_THROW_OR_TERMINATE(ex, msg)
Definition: robin_growth_policy.h:56
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition: robin_growth_policy.h:218
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:91
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:213
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:140
Definition: robin_growth_policy.h:283
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:115
unsigned int m_iprime
Definition: robin_growth_policy.h:322
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:191
Definition: robin_growth_policy.h:80