IOSS  2.0
robin_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2017 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_ROBIN_GROWTH_POLICY_H
25 #define TSL_ROBIN_GROWTH_POLICY_H
26 
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <iterator>
33 #include <limits>
34 #include <ratio>
35 #include <stdexcept>
36 
37 #ifdef TSL_DEBUG
38 #define tsl_rh_assert(expr) assert(expr)
39 #else
40 #define tsl_rh_assert(expr) (static_cast<void>(0))
41 #endif
42 
43 /**
44  * If exceptions are enabled, throw the exception passed in parameter, otherwise call
45  * std::terminate.
46  */
47 #if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
48  (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
49  !defined(TSL_NO_EXCEPTIONS)
50 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
51 #else
52 #define TSL_RH_NO_EXCEPTIONS
53 #ifdef NDEBUG
54 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
55 #else
56 #include <iostream>
57 #define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
58  do { \
59  std::cerr << msg << std::endl; \
60  std::terminate(); \
61  } while (0)
62 #endif
63 #endif
64 
65 #if defined(__GNUC__) || defined(__clang__)
66 #define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
67 #else
68 #define TSL_RH_LIKELY(exp) (exp)
69 #endif
70 
71 namespace tsl {
72  namespace rh {
73 
74  /**
75  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two.
76  * It allows the table to use a mask operation instead of a modulo operation to map a hash to a
77  * bucket.
78  *
79  * GrowthFactor must be a power of two >= 2.
80  */
81  template <std::size_t GrowthFactor> class power_of_two_growth_policy
82  {
83  public:
84  /**
85  * Called on the hash table creation and on rehash. The number of buckets for the table is
86  * passed in parameter. This number is a minimum, the policy may update this value with a
87  * higher value if needed (but not lower).
88  *
89  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
90  * bucket_for_hash must always return 0 in this case.
91  */
92  explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
93  {
94  if (min_bucket_count_in_out > max_bucket_count()) {
95  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
96  }
97 
98  if (min_bucket_count_in_out > 0) {
99  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
100  m_mask = min_bucket_count_in_out - 1;
101  }
102  else {
103  m_mask = 0;
104  }
105  }
106 
107  /**
108  * Return the bucket [0, bucket_count()) to which the hash belongs.
109  * If bucket_count() is 0, it must always return 0.
110  */
111  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
112 
113  /**
114  * Return the number of buckets that should be used on next growth.
115  */
116  std::size_t next_bucket_count() const
117  {
118  if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
119  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
120  }
121 
122  return (m_mask + 1) * GrowthFactor;
123  }
124 
125  /**
126  * Return the maximum number of buckets supported by the policy.
127  */
128  std::size_t max_bucket_count() const
129  {
130  // Largest power of two.
131  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
132  }
133 
134  /**
135  * Reset the growth policy as if it was created with a bucket count of 0.
136  * After a clear, the policy must always return 0 when bucket_for_hash is called.
137  */
138  void clear() noexcept { m_mask = 0; }
139 
140  private:
141  static std::size_t round_up_to_power_of_two(std::size_t value)
142  {
143  if (is_power_of_two(value)) {
144  return value;
145  }
146 
147  if (value == 0) {
148  return 1;
149  }
150 
151  --value;
152  for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
153  value |= value >> i;
154  }
155 
156  return value + 1;
157  }
158 
159  static constexpr bool is_power_of_two(std::size_t value)
160  {
161  return value != 0 && (value & (value - 1)) == 0;
162  }
163 
164  protected:
165  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
166  "GrowthFactor must be a power of two >= 2.");
167 
168  std::size_t m_mask;
169  };
170 
171  /**
172  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
173  * to a bucket. Slower but it can be useful if you want a slower growth.
174  */
175  template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
176  {
177  public:
178  explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
179  {
180  if (min_bucket_count_in_out > max_bucket_count()) {
181  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
182  }
183 
184  if (min_bucket_count_in_out > 0) {
185  m_mod = min_bucket_count_in_out;
186  }
187  else {
188  m_mod = 1;
189  }
190  }
191 
192  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
193 
194  std::size_t next_bucket_count() const
195  {
196  if (m_mod == max_bucket_count()) {
197  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
198  }
199 
200  const double nxt_bucket_count =
201  std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
202  if (!std::isnormal(nxt_bucket_count)) {
203  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
204  }
205 
206  if (nxt_bucket_count > double(max_bucket_count())) {
207  return max_bucket_count();
208  }
209  else {
210  return std::size_t(nxt_bucket_count);
211  }
212  }
213 
214  std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
215 
216  void clear() noexcept { m_mod = 1; }
217 
218  private:
219  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
220  1.0 * GrowthFactor::num / GrowthFactor::den;
221  static const std::size_t MAX_BUCKET_COUNT = std::size_t(
222  double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
223 
224  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
225 
226  std::size_t m_mod;
227  };
228 
229  namespace detail {
230 
231  static constexpr const std::array<std::size_t, 40> PRIMES = {
232  {1ul, 5ul, 17ul, 29ul, 37ul, 53ul,
233  67ul, 79ul, 97ul, 131ul, 193ul, 257ul,
234  389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul,
235  3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul,
236  196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
237  12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
238  805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}};
239 
240  template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
241  {
242  return hash % PRIMES[IPrime];
243  }
244 
245  // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo
246  // as the compiler can optimize the modulo code better with a constant known at the
247  // compilation.
248  static constexpr const std::array<std::size_t (*)(std::size_t), 40> MOD_PRIME = {
249  {&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>,
250  &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>,
251  &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, &mod<23>,
252  &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>,
253  &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}};
254 
255  } // namespace detail
256 
257  /**
258  * Grow the hash table by using prime numbers as bucket count. Slower than
259  * tsl::rh::power_of_two_growth_policy in general but will probably distribute the values around
260  * better in the buckets with a poor hash function.
261  *
262  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant
263  * primes numbers.
264  *
265  * With a switch the code would look like:
266  * \code
267  * switch(iprime) { // iprime is the current prime of the hash table
268  * case 0: hash % 5ul;
269  * break;
270  * case 1: hash % 17ul;
271  * break;
272  * case 2: hash % 29ul;
273  * break;
274  * ...
275  * }
276  * \endcode
277  *
278  * Due to the constant variable in the modulo the compiler is able to optimize the operation
279  * by a series of multiplications, substractions and shifts.
280  *
281  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64
282  * bits environment.
283  */
285  {
286  public:
287  explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
288  {
289  auto it_prime =
290  std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
291  if (it_prime == detail::PRIMES.end()) {
292  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
293  }
294 
295  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
296  if (min_bucket_count_in_out > 0) {
297  min_bucket_count_in_out = *it_prime;
298  }
299  else {
300  min_bucket_count_in_out = 0;
301  }
302  }
303 
304  std::size_t bucket_for_hash(std::size_t hash) const noexcept
305  {
306  return detail::MOD_PRIME[m_iprime](hash);
307  }
308 
309  std::size_t next_bucket_count() const
310  {
311  if (m_iprime + 1 >= detail::PRIMES.size()) {
312  TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maxmimum size.");
313  }
314 
315  return detail::PRIMES[m_iprime + 1];
316  }
317 
318  std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
319 
320  void clear() noexcept { m_iprime = 0; }
321 
322  private:
323  unsigned int m_iprime;
324 
325  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
326  "The type of m_iprime is not big enough.");
327  };
328 
329  } // namespace rh
330 } // namespace tsl
331 
332 #endif
tsl::rh::prime_growth_policy::prime_growth_policy
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:287
tsl::rh::mod_growth_policy::MAX_BUCKET_COUNT
static const std::size_t MAX_BUCKET_COUNT
Definition: robin_growth_policy.h:221
tsl::rh::prime_growth_policy
Definition: robin_growth_policy.h:284
tsl::rh::power_of_two_growth_policy::m_mask
std::size_t m_mask
Definition: robin_growth_policy.h:166
tsl
Definition: bhopscotch_map.h:37
tsl::rh::mod_growth_policy::REHASH_SIZE_MULTIPLICATION_FACTOR
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition: robin_growth_policy.h:219
tsl::rh::detail::mod
static constexpr std::size_t mod(std::size_t hash)
Definition: robin_growth_policy.h:240
tsl::rh::detail::MOD_PRIME
static constexpr const std::array< std::size_t(*)(std::size_t), 40 > MOD_PRIME
Definition: robin_growth_policy.h:248
tsl::rh::power_of_two_growth_policy::power_of_two_growth_policy
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:92
tsl::rh::mod_growth_policy::mod_growth_policy
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: robin_growth_policy.h:178
tsl::rh::power_of_two_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:116
tsl::rh::power_of_two_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:128
tsl::rh::prime_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:304
tsl::rh::power_of_two_growth_policy::clear
void clear() noexcept
Definition: robin_growth_policy.h:138
tsl::rh::power_of_two_growth_policy::round_up_to_power_of_two
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:141
tsl::rh::detail::PRIMES
static constexpr const std::array< std::size_t, 40 > PRIMES
Definition: robin_growth_policy.h:231
tsl::rh::mod_growth_policy::m_mod
std::size_t m_mod
Definition: robin_growth_policy.h:224
TSL_RH_THROW_OR_TERMINATE
#define TSL_RH_THROW_OR_TERMINATE(ex, msg)
Definition: robin_growth_policy.h:57
tsl::rh::mod_growth_policy::clear
void clear() noexcept
Definition: robin_growth_policy.h:216
tsl::rh::prime_growth_policy::m_iprime
unsigned int m_iprime
Definition: robin_growth_policy.h:323
tsl::rh::power_of_two_growth_policy::is_power_of_two
static constexpr bool is_power_of_two(std::size_t value)
Definition: robin_growth_policy.h:159
tsl::rh::prime_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:309
tsl::rh::power_of_two_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:111
tsl::rh::power_of_two_growth_policy
Definition: robin_growth_policy.h:81
tsl::rh::mod_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:214
tsl::rh::prime_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: robin_growth_policy.h:318
tsl::rh::mod_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: robin_growth_policy.h:192
tsl::rh::mod_growth_policy
Definition: robin_growth_policy.h:175
tsl::rh::mod_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: robin_growth_policy.h:194
tsl::rh::prime_growth_policy::clear
void clear() noexcept
Definition: robin_growth_policy.h:320