IOSS  2.0
hopscotch_growth_policy.h
Go to the documentation of this file.
1 /**
2  * MIT License
3  *
4  * Copyright (c) 2018 Tessil
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25 #define TSL_HOPSCOTCH_GROWTH_POLICY_H
26 
27 #include <algorithm>
28 #include <array>
29 #include <climits>
30 #include <cmath>
31 #include <cstddef>
32 #include <iterator>
33 #include <limits>
34 #include <ratio>
35 #include <stdexcept>
36 
37 namespace tsl {
38  namespace hh {
39 
40  /**
41  * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a power of two.
42  * It allows the table to use a mask operation instead of a modulo operation to map a hash to a
43  * bucket.
44  *
45  * GrowthFactor must be a power of two >= 2.
46  */
47  template <std::size_t GrowthFactor> class power_of_two_growth_policy
48  {
49  public:
50  /**
51  * Called on the hash table creation and on rehash. The number of buckets for the table is
52  * passed in parameter. This number is a minimum, the policy may update this value with a
53  * higher value if needed (but not lower).
54  *
55  * If 0 is given, min_bucket_count_in_out must still be 0 after the policy creation and
56  * bucket_for_hash must always return 0 in this case.
57  */
58  explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
59  {
60  if (min_bucket_count_in_out > max_bucket_count()) {
61  throw std::length_error("The hash table exceeds its maxmimum size.");
62  }
63 
64  if (min_bucket_count_in_out > 0) {
65  min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
66  m_mask = min_bucket_count_in_out - 1;
67  }
68  else {
69  m_mask = 0;
70  }
71  }
72 
73  /**
74  * Return the bucket [0, bucket_count()) to which the hash belongs.
75  * If bucket_count() is 0, it must always return 0.
76  */
77  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
78 
79  /**
80  * Return the bucket count to use when the bucket array grows on rehash.
81  */
82  std::size_t next_bucket_count() const
83  {
84  if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
85  throw std::length_error("The hash table exceeds its maxmimum size.");
86  }
87 
88  return (m_mask + 1) * GrowthFactor;
89  }
90 
91  /**
92  * Return the maximum number of buckets supported by the policy.
93  */
94  std::size_t max_bucket_count() const
95  {
96  // Largest power of two.
97  return (std::numeric_limits<std::size_t>::max() / 2) + 1;
98  }
99 
100  /**
101  * Reset the growth policy as if it was created with a bucket count of 0.
102  * After a clear, the policy must always return 0 when bucket_for_hash is called.
103  */
104  void clear() noexcept { m_mask = 0; }
105 
106  private:
107  static std::size_t round_up_to_power_of_two(std::size_t value)
108  {
109  if (is_power_of_two(value)) {
110  return value;
111  }
112 
113  if (value == 0) {
114  return 1;
115  }
116 
117  --value;
118  for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
119  value |= value >> i;
120  }
121 
122  return value + 1;
123  }
124 
125  static constexpr bool is_power_of_two(std::size_t value)
126  {
127  return value != 0 && (value & (value - 1)) == 0;
128  }
129 
130  private:
131  static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
132  "GrowthFactor must be a power of two >= 2.");
133 
134  std::size_t m_mask;
135  };
136 
137  /**
138  * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo to map a hash
139  * to a bucket. Slower but it can be useful if you want a slower growth.
140  */
141  template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
142  {
143  public:
144  explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
145  {
146  if (min_bucket_count_in_out > max_bucket_count()) {
147  throw std::length_error("The hash table exceeds its maxmimum size.");
148  }
149 
150  if (min_bucket_count_in_out > 0) {
151  m_mod = min_bucket_count_in_out;
152  }
153  else {
154  m_mod = 1;
155  }
156  }
157 
158  std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
159 
160  std::size_t next_bucket_count() const
161  {
162  if (m_mod == max_bucket_count()) {
163  throw std::length_error("The hash table exceeds its maxmimum size.");
164  }
165 
166  const double next_bucket_count =
167  std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
168  if (!std::isnormal(next_bucket_count)) {
169  throw std::length_error("The hash table exceeds its maxmimum size.");
170  }
171 
172  if (next_bucket_count > double(max_bucket_count())) {
173  return max_bucket_count();
174  }
175  else {
176  return std::size_t(next_bucket_count);
177  }
178  }
179 
180  std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
181 
182  void clear() noexcept { m_mod = 1; }
183 
184  private:
185  static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
186  1.0 * GrowthFactor::num / GrowthFactor::den;
187  static const std::size_t MAX_BUCKET_COUNT = std::size_t(
188  double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
189 
190  static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
191 
192  std::size_t m_mod;
193  };
194 
195  namespace detail {
196 
197  static constexpr const std::array<std::size_t, 40> PRIMES = {
198  {1ul, 5ul, 17ul, 29ul, 37ul, 53ul,
199  67ul, 79ul, 97ul, 131ul, 193ul, 257ul,
200  389ul, 521ul, 769ul, 1031ul, 1543ul, 2053ul,
201  3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul,
202  196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul,
203  12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
204  805306457ul, 1610612741ul, 3221225473ul, 4294967291ul}};
205 
206  template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
207  {
208  return hash % PRIMES[IPrime];
209  }
210 
211  // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for faster modulo
212  // as the compiler can optimize the modulo code better with a constant known at the
213  // compilation.
214  static constexpr const std::array<std::size_t (*)(std::size_t), 40> MOD_PRIME = {
215  {&mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>,
216  &mod<8>, &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>,
217  &mod<16>, &mod<17>, &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>, &mod<23>,
218  &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>,
219  &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>}};
220 
221  } // namespace detail
222 
223  /**
224  * Grow the hash table by using prime numbers as bucket count. Slower than
225  * tsl::hh::power_of_two_growth_policy in general but will probably distribute the values around
226  * better in the buckets with a poor hash function.
227  *
228  * To allow the compiler to optimize the modulo operation, a lookup table is used with constant
229  * primes numbers.
230  *
231  * With a switch the code would look like:
232  * \code
233  * switch(iprime) { // iprime is the current prime of the hash table
234  * case 0: hash % 5ul;
235  * break;
236  * case 1: hash % 17ul;
237  * break;
238  * case 2: hash % 29ul;
239  * break;
240  * ...
241  * }
242  * \endcode
243  *
244  * Due to the constant variable in the modulo the compiler is able to optimize the operation
245  * by a series of multiplications, substractions and shifts.
246  *
247  * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34) * 5' in a 64
248  * bits environment.
249  */
251  {
252  public:
253  explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
254  {
255  auto it_prime =
256  std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
257  if (it_prime == detail::PRIMES.end()) {
258  throw std::length_error("The hash table exceeds its maxmimum size.");
259  }
260 
261  m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
262  if (min_bucket_count_in_out > 0) {
263  min_bucket_count_in_out = *it_prime;
264  }
265  else {
266  min_bucket_count_in_out = 0;
267  }
268  }
269 
270  std::size_t bucket_for_hash(std::size_t hash) const noexcept
271  {
272  return detail::MOD_PRIME[m_iprime](hash);
273  }
274 
275  std::size_t next_bucket_count() const
276  {
277  if (m_iprime + 1 >= detail::PRIMES.size()) {
278  throw std::length_error("The hash table exceeds its maxmimum size.");
279  }
280 
281  return detail::PRIMES[m_iprime + 1];
282  }
283 
284  std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
285 
286  void clear() noexcept { m_iprime = 0; }
287 
288  private:
289  unsigned int m_iprime;
290 
291  static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
292  "The type of m_iprime is not big enough.");
293  };
294 
295  } // namespace hh
296 } // namespace tsl
297 
298 #endif
tsl::hh::prime_growth_policy::prime_growth_policy
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: hopscotch_growth_policy.h:253
tsl
Definition: bhopscotch_map.h:37
tsl::hh::power_of_two_growth_policy::m_mask
std::size_t m_mask
Definition: hopscotch_growth_policy.h:132
tsl::hh::mod_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: hopscotch_growth_policy.h:160
tsl::hh::prime_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: hopscotch_growth_policy.h:270
tsl::hh::mod_growth_policy::REHASH_SIZE_MULTIPLICATION_FACTOR
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition: hopscotch_growth_policy.h:185
tsl::hh::mod_growth_policy::m_mod
std::size_t m_mod
Definition: hopscotch_growth_policy.h:190
tsl::hh::mod_growth_policy::clear
void clear() noexcept
Definition: hopscotch_growth_policy.h:182
tsl::hh::mod_growth_policy
Definition: hopscotch_growth_policy.h:141
tsl::hh::detail::mod
static constexpr std::size_t mod(std::size_t hash)
Definition: hopscotch_growth_policy.h:206
tsl::hh::prime_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: hopscotch_growth_policy.h:284
tsl::hh::power_of_two_growth_policy::clear
void clear() noexcept
Definition: hopscotch_growth_policy.h:104
tsl::hh::prime_growth_policy::clear
void clear() noexcept
Definition: hopscotch_growth_policy.h:286
tsl::hh::power_of_two_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: hopscotch_growth_policy.h:82
tsl::hh::mod_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: hopscotch_growth_policy.h:180
tsl::hh::mod_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: hopscotch_growth_policy.h:158
tsl::hh::power_of_two_growth_policy::is_power_of_two
static constexpr bool is_power_of_two(std::size_t value)
Definition: hopscotch_growth_policy.h:125
tsl::hh::mod_growth_policy::mod_growth_policy
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: hopscotch_growth_policy.h:144
tsl::hh::mod_growth_policy::MAX_BUCKET_COUNT
static const std::size_t MAX_BUCKET_COUNT
Definition: hopscotch_growth_policy.h:187
tsl::hh::power_of_two_growth_policy::round_up_to_power_of_two
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition: hopscotch_growth_policy.h:107
tsl::hh::prime_growth_policy::next_bucket_count
std::size_t next_bucket_count() const
Definition: hopscotch_growth_policy.h:275
tsl::hh::prime_growth_policy::m_iprime
unsigned int m_iprime
Definition: hopscotch_growth_policy.h:289
tsl::hh::power_of_two_growth_policy::bucket_for_hash
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition: hopscotch_growth_policy.h:77
tsl::hh::power_of_two_growth_policy::max_bucket_count
std::size_t max_bucket_count() const
Definition: hopscotch_growth_policy.h:94
tsl::hh::power_of_two_growth_policy
Definition: hopscotch_growth_policy.h:47
tsl::hh::detail::MOD_PRIME
static constexpr const std::array< std::size_t(*)(std::size_t), 40 > MOD_PRIME
Definition: hopscotch_growth_policy.h:214
tsl::hh::detail::PRIMES
static constexpr const std::array< std::size_t, 40 > PRIMES
Definition: hopscotch_growth_policy.h:197
tsl::hh::power_of_two_growth_policy::power_of_two_growth_policy
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition: hopscotch_growth_policy.h:58
tsl::hh::prime_growth_policy
Definition: hopscotch_growth_policy.h:250