piranha  0.10
hash_set.hpp
1 /* Copyright 2009-2017 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8  * the GNU Lesser General Public License as published by the Free
9  Software Foundation; either version 3 of the License, or (at your
10  option) any later version.
11 
12 or
13 
14  * the GNU General Public License as published by the Free Software
15  Foundation; either version 3 of the License, or (at your option) any
16  later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library. If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_HASH_SET_HPP
30 #define PIRANHA_HASH_SET_HPP
31 
32 #include <boost/iterator/iterator_facade.hpp>
33 #include <boost/numeric/conversion/cast.hpp>
34 #include <cmath>
35 #include <cstddef>
36 #include <cstdint>
37 #include <functional>
38 #include <initializer_list>
39 #include <limits>
40 #include <map>
41 #include <memory>
42 #include <new>
43 #include <stdexcept>
44 #include <string>
45 #include <tuple>
46 #include <type_traits>
47 #include <utility>
48 #include <vector>
49 
50 #include <piranha/config.hpp>
51 #include <piranha/debug_access.hpp>
52 #include <piranha/detail/init_data.hpp>
53 #include <piranha/exceptions.hpp>
54 #include <piranha/s11n.hpp>
55 #include <piranha/safe_cast.hpp>
56 #include <piranha/thread_pool.hpp>
57 #include <piranha/type_traits.hpp>
58 
59 namespace piranha
60 {
61 
63 
99 /* Some improvement NOTEs:
100 * - tests for low-level methods
101 * - better increase_size with recycling of dynamically-allocated nodes
102 * - see if we can reduce the number of branches in the find algorithm (e.g., when traversing the list) -> this should be
103 * a general review of the internal linked list
104 * implementation.
105 * - memory handling: the usage of the allocator object should be more standard, i.e., use the pointer and reference
106 * typedefs defined within, replace
107 * positional new with construct even in the list implementation. Then it can be made a template parameter with default =
108 * std::allocator.
109 * - use of new: we should probably replace new with new, in case new is overloaded -> also, check all occurrences of
110 * root new, it is used as well
111 * in static_vector for instance.
112 * - inline the first bucket, with the idea of avoiding memory allocations when the series consist of a single element
113 * (useful for instance
114 * when iterating over the series with the fat iterator).
115 * - optimisation for the begin() iterator,
116 * - check again about the mod implementation,
117 * - in the dtor checks, do we still want the shutdown() logic after we rework symbol?
118 * are we still accessing potentially global variables?
119 * - maybe a bit more enabling for ctor and other template methods, not really essential though.
120 */
121 template <typename T, typename Hash = std::hash<T>, typename Pred = std::equal_to<T>>
122 class hash_set
123 {
124  PIRANHA_TT_CHECK(is_container_element, T);
125  PIRANHA_TT_CHECK(is_hash_function_object, Hash, T);
126  PIRANHA_TT_CHECK(is_equality_function_object, Pred, T);
127  // Make friend with debug access class.
128  template <typename U>
129  friend class debug_access;
130  // Node class for bucket element.
131  struct node {
132  typedef typename std::aligned_storage<sizeof(T), alignof(T)>::type storage_type;
133  node() : m_next(nullptr)
134  {
135  }
136  // Erase all other ctors/assignments, we do not want to
137  // copy around this object as m_storage is not what it is
138  // (and often could be uninitialized, which would lead to UB if used).
139  node(const node &) = delete;
140  node(node &&) = delete;
141  node &operator=(const node &) = delete;
142  node &operator=(node &&) = delete;
143  // NOTE: the idea here is that we use these helpers only *after* the object has been constructed,
144  // for constructing the object we must always access m_storage directly. The chain of two casts
145  // seems to be the only standard-conforming way of getting out a pointer to T.
146  // http://stackoverflow.com/questions/1082378/what-is-the-basic-use-of-aligned-storage
147  // http://stackoverflow.com/questions/13466556/aligned-storage-and-strict-aliasing
148  // http://en.cppreference.com/w/cpp/types/aligned_storage
149  // A couple of further notes:
150  // - pointer casting to/from void * is ok (4.10.2 and 5.2.9.13, via static_cast);
151  // - the placement new operator (which we use to construct the object into the storage)
152  // is guaranteed to construct the object at the beginning of the storage, and taking
153  // a void * out of the storage will return the address of the object stored within;
154  // - the lifetime of the original POD storage_type ends when we reuse its storage via
155  // placement new.
156  // This page contains some more info about storage reuse:
157  // http://en.cppreference.com/w/cpp/language/lifetime#Storage_reuse
158  // See also this link, regarding static_cast vs reinterpret_cast for this very specific
159  // usage:
160  // http://stackoverflow.com/questions/19300142/static-cast-and-reinterpret-cast-for-stdaligned-storage
161  // Apparently, it is equivalent.
162  const T *ptr() const
163  {
164  piranha_assert(m_next);
165  return static_cast<const T *>(static_cast<const void *>(&m_storage));
166  }
167  T *ptr()
168  {
169  piranha_assert(m_next);
170  return static_cast<T *>(static_cast<void *>(&m_storage));
171  }
172  storage_type m_storage;
173  node *m_next;
174  };
175  // List constituting the bucket.
176  // NOTE: in this list implementation the m_next pointer is used as a flag to signal if the current node
177  // stores an item: the pointer is not null if it does contain something. The value of m_next pointer in a node is
178  // set to a constant
179  // &terminator node if it is the last node of the list. I.e., the terminator is end() in all cases
180  // except when the list is empty (in that case the m_node member is end()).
181  struct list {
182  // NOTE: re: std::iterator inheritance. It's netiher encouraged not discouraged according to this:
183  // http://stackoverflow.com/questions/6471019/can-should-i-inherit-from-stl-iterator
184  // I think we can just keep on using boost iterator_facade for the time being.
185  template <typename U>
186  class iterator_impl : public boost::iterator_facade<iterator_impl<U>, U, boost::forward_traversal_tag>
187  {
188  typedef typename std::conditional<std::is_const<U>::value, node const *, node *>::type ptr_type;
189  template <typename V>
190  friend class iterator_impl;
191 
192  public:
193  iterator_impl() : m_ptr(nullptr)
194  {
195  }
196  explicit iterator_impl(ptr_type ptr) : m_ptr(ptr)
197  {
198  }
199  // Constructor from other iterator type.
200  template <typename V,
201  enable_if_t<std::is_convertible<typename iterator_impl<V>::ptr_type, ptr_type>::value, int> = 0>
202  iterator_impl(const iterator_impl<V> &other) : m_ptr(other.m_ptr)
203  {
204  }
205 
206  private:
207  friend class boost::iterator_core_access;
208  void increment()
209  {
210  piranha_assert(m_ptr && m_ptr->m_next);
211  m_ptr = m_ptr->m_next;
212  }
213  template <typename V>
214  bool equal(const iterator_impl<V> &other) const
215  {
216  return m_ptr == other.m_ptr;
217  }
218  U &dereference() const
219  {
220  piranha_assert(m_ptr && m_ptr->m_next);
221  return *m_ptr->ptr();
222  }
223 
224  public:
225  ptr_type m_ptr;
226  };
227  typedef iterator_impl<T> iterator;
228  typedef iterator_impl<T const> const_iterator;
229  // Static checks on the iterator types.
232  list() : m_node()
233  {
234  }
235  list(list &&other) noexcept : m_node()
236  {
237  steal_from_rvalue(std::move(other));
238  }
239  list(const list &other) : m_node()
240  {
241  try {
242  auto cur = &m_node;
243  auto other_cur = &other.m_node;
244  while (other_cur->m_next) {
245  if (cur->m_next) {
246  // This assert means we are operating on the last element
247  // of the list, as we are doing back-insertions.
248  piranha_assert(cur->m_next == &terminator);
249  // Create a new node with content equal to other_cur
250  // and linking forward to the terminator.
251  std::unique_ptr<node> new_node(::new node());
252  ::new (static_cast<void *>(&new_node->m_storage)) T(*other_cur->ptr());
253  new_node->m_next = &terminator;
254  // Link the new node.
255  cur->m_next = new_node.release();
256  cur = cur->m_next;
257  } else {
258  // This means this is the first node.
259  ::new (static_cast<void *>(&cur->m_storage)) T(*other_cur->ptr());
260  cur->m_next = &terminator;
261  }
262  other_cur = other_cur->m_next;
263  }
264  } catch (...) {
265  destroy();
266  throw;
267  }
268  }
269  list &operator=(list &&other)
270  {
271  if (likely(this != &other)) {
272  // Destroy the content of this.
273  destroy();
274  steal_from_rvalue(std::move(other));
275  }
276  return *this;
277  }
278  list &operator=(const list &other)
279  {
280  if (likely(this != &other)) {
281  auto tmp = other;
282  *this = std::move(tmp);
283  }
284  return *this;
285  }
286  ~list()
287  {
288  destroy();
289  }
290  void steal_from_rvalue(list &&other)
291  {
292  piranha_assert(empty());
293  // Do something only if there is content in the other.
294  if (other.m_node.m_next) {
295  // Move construct current first node with first node of other.
296  ::new (static_cast<void *>(&m_node.m_storage)) T(std::move(*other.m_node.ptr()));
297  // Link remaining content of other into this.
298  m_node.m_next = other.m_node.m_next;
299  // Destroy first node of other.
300  other.m_node.ptr()->~T();
301  other.m_node.m_next = nullptr;
302  }
303  piranha_assert(other.empty());
304  }
305  template <typename U, enable_if_t<std::is_same<T, uncvref_t<U>>::value, int> = 0>
306  node *insert(U &&item)
307  {
308  // NOTE: optimize with likely/unlikely?
309  if (m_node.m_next) {
310  // Create the new node and forward-link it to the second node.
311  std::unique_ptr<node> new_node(::new node());
312  ::new (static_cast<void *>(&new_node->m_storage)) T(std::forward<U>(item));
313  new_node->m_next = m_node.m_next;
314  // Link first node to the new node.
315  m_node.m_next = new_node.release();
316  return m_node.m_next;
317  } else {
318  ::new (static_cast<void *>(&m_node.m_storage)) T(std::forward<U>(item));
319  m_node.m_next = &terminator;
320  return &m_node;
321  }
322  }
323  iterator begin()
324  {
325  return iterator(&m_node);
326  }
327  iterator end()
328  {
329  return iterator((m_node.m_next) ? &terminator : &m_node);
330  }
331  const_iterator begin() const
332  {
333  return const_iterator(&m_node);
334  }
335  const_iterator end() const
336  {
337  return const_iterator((m_node.m_next) ? &terminator : &m_node);
338  }
339  bool empty() const
340  {
341  return !m_node.m_next;
342  }
343  void destroy()
344  {
345  node *cur = &m_node;
346  while (cur->m_next) {
347  // Store the current value temporarily.
348  auto old = cur;
349  // Assign the next.
350  cur = cur->m_next;
351  // Destroy the old payload and erase connections.
352  old->ptr()->~T();
353  old->m_next = nullptr;
354  // If the old node was not the initial one, delete it.
355  if (old != &m_node) {
356  ::delete old;
357  }
358  }
359  // After destruction, the list should be equivalent to a default-constructed one.
360  piranha_assert(empty());
361  }
362  static node terminator;
363  node m_node;
364  };
365  // Allocator type.
366  // NOTE: if we move allocator choice in public interface we need to document the exception behaviour of the
367  // allocator. Also, check the validity
368  // of the assumptions on the type returned by allocate(): must it be a pointer or just convertible to pointer?
369  // NOTE: for std::allocator, pointer is guaranteed to be "T *":
370  // http://en.cppreference.com/w/cpp/memory/allocator
371  typedef std::allocator<list> allocator_type;
372 
373 public:
375  using hasher = Hash;
377  using key_equal = Pred;
379  using key_type = T;
381 
384  using size_type = std::size_t;
385 
386 private:
387  // The container is a pointer to an array of lists.
388  using ptr_type = list *;
389  // Internal pack type, containing the pointer to the objects, the hash/equal functor
390  // and the allocator. In many cases these are stateless so we can exploit EBCO if implemented
391  // in the tuple type (likely).
392  using pack_type = std::tuple<ptr_type, hasher, key_equal, allocator_type>;
393  // A few handy accessors.
394  ptr_type &ptr()
395  {
396  return std::get<0u>(m_pack);
397  }
398  const ptr_type &ptr() const
399  {
400  return std::get<0u>(m_pack);
401  }
402  const hasher &hash() const
403  {
404  return std::get<1u>(m_pack);
405  }
406  const key_equal &k_equal() const
407  {
408  return std::get<2u>(m_pack);
409  }
410  allocator_type &allocator()
411  {
412  return std::get<3u>(m_pack);
413  }
414  const allocator_type &allocator() const
415  {
416  return std::get<3u>(m_pack);
417  }
418  // Definition of the iterator type for the set.
419  template <typename Key>
420  class iterator_impl : public boost::iterator_facade<iterator_impl<Key>, Key, boost::forward_traversal_tag>
421  {
422  friend class hash_set;
423  typedef typename std::conditional<std::is_const<Key>::value, hash_set const, hash_set>::type set_type;
424  typedef typename std::conditional<std::is_const<Key>::value, typename list::const_iterator,
425  typename list::iterator>::type it_type;
426 
427  public:
428  iterator_impl() : m_set(nullptr), m_idx(0u), m_it()
429  {
430  }
431  explicit iterator_impl(set_type *set, const size_type &idx, it_type it) : m_set(set), m_idx(idx), m_it(it)
432  {
433  }
434 
435  private:
436  friend class boost::iterator_core_access;
437  void increment()
438  {
439  piranha_assert(m_set);
440  auto &container = m_set->ptr();
441  // Assert that the current iterator is valid.
442  piranha_assert(m_idx < m_set->bucket_count());
443  piranha_assert(!container[m_idx].empty());
444  piranha_assert(m_it != container[m_idx].end());
445  ++m_it;
446  if (m_it == container[m_idx].end()) {
447  const size_type container_size = m_set->bucket_count();
448  while (true) {
449  ++m_idx;
450  if (m_idx == container_size) {
451  m_it = it_type{};
452  return;
453  } else if (!container[m_idx].empty()) {
454  m_it = container[m_idx].begin();
455  return;
456  }
457  }
458  }
459  }
460  bool equal(const iterator_impl &other) const
461  {
462  // NOTE: comparing iterators from different containers is UB
463  // in the standard.
464  piranha_assert(m_set && other.m_set);
465  return (m_idx == other.m_idx && m_it == other.m_it);
466  }
467  Key &dereference() const
468  {
469  piranha_assert(m_set && m_idx < m_set->bucket_count() && m_it != m_set->ptr()[m_idx].end());
470  return *m_it;
471  }
472 
473  private:
474  set_type *m_set;
475  size_type m_idx;
476  it_type m_it;
477  };
478  void init_from_n_buckets(const size_type &n_buckets, unsigned n_threads)
479  {
480  piranha_assert(!ptr() && !m_log2_size && !m_n_elements);
481  if (unlikely(!n_threads)) {
482  piranha_throw(std::invalid_argument, "the number of threads must be strictly positive");
483  }
484  // Proceed to actual construction only if the requested number of buckets is nonzero.
485  if (!n_buckets) {
486  return;
487  }
488  const size_type log2_size = get_log2_from_hint(n_buckets);
489  const size_type size = size_type(1u) << log2_size;
490  auto new_ptr = allocator().allocate(size);
491  if (unlikely(!new_ptr)) {
492  piranha_throw(std::bad_alloc, );
493  }
494  if (n_threads == 1u) {
495  // Default-construct the elements of the array.
496  // NOTE: this is a noexcept operation, no need to account for rolling back.
497  for (size_type i = 0u; i < size; ++i) {
498  allocator().construct(&new_ptr[i]);
499  }
500  } else {
501  // Sync variables.
502  using crs_type = std::vector<std::pair<size_type, size_type>>;
503  crs_type constructed_ranges(static_cast<typename crs_type::size_type>(n_threads),
504  std::make_pair(size_type(0u), size_type(0u)));
505  if (unlikely(constructed_ranges.size() != n_threads)) {
506  piranha_throw(std::bad_alloc, );
507  }
508  // Thread function.
509  auto thread_function = [this, new_ptr, &constructed_ranges](const size_type &start, const size_type &end,
510  const unsigned &thread_idx) {
511  for (size_type i = start; i != end; ++i) {
512  this->allocator().construct(&new_ptr[i]);
513  }
514  constructed_ranges[thread_idx] = std::make_pair(start, end);
515  };
516  // Work per thread.
517  const auto wpt = size / n_threads;
518  future_list<decltype(thread_function(0u, 0u, 0u))> f_list;
519  try {
520  for (unsigned i = 0u; i < n_threads; ++i) {
521  const auto start = static_cast<size_type>(wpt * i),
522  end = static_cast<size_type>((i == n_threads - 1u) ? size : wpt * (i + 1u));
523  f_list.push_back(thread_pool::enqueue(i, thread_function, start, end, i));
524  }
525  f_list.wait_all();
526  // NOTE: no need to get_all() here, as we know no exceptions will be generated inside thread_func.
527  } catch (...) {
528  // NOTE: everything in thread_func is noexcept, if we are here the exception was thrown by enqueue or
529  // push_back.
530  // Wait for everything to wind down.
531  f_list.wait_all();
532  // Destroy what was constructed.
533  for (const auto &r : constructed_ranges) {
534  for (size_type i = r.first; i != r.second; ++i) {
535  allocator().destroy(&new_ptr[i]);
536  }
537  }
538  // Deallocate before re-throwing.
539  allocator().deallocate(new_ptr, size);
540  throw;
541  }
542  }
543  // Assign the members.
544  ptr() = new_ptr;
545  m_log2_size = log2_size;
546  }
547  // Destroy all elements and deallocate ptr().
548  void destroy_and_deallocate()
549  {
550  // Proceed to destroy all elements and deallocate only if the set is actually storing something.
551  if (ptr()) {
552  const size_type size = size_type(1u) << m_log2_size;
553  for (size_type i = 0u; i < size; ++i) {
554  allocator().destroy(&ptr()[i]);
555  }
556  allocator().deallocate(ptr(), size);
557  } else {
558  piranha_assert(!m_log2_size && !m_n_elements);
559  }
560  }
561  // Serialization support.
562  friend class boost::serialization::access;
563  template <class Archive>
564  void save(Archive &ar, unsigned) const
565  {
566  // Size.
567  boost_save(ar, size());
568  // Serialize the items.
569  boost_save_range(ar, begin(), end());
570  }
571  template <class Archive>
572  void load(Archive &ar, unsigned)
573  {
574  // Reset this.
575  *this = hash_set{};
576  // Recover the size.
577  size_type size;
578  boost_load(ar, size);
579  // Prepare an adequate number of buckets.
580  rehash(boost::numeric_cast<size_type>(std::ceil(static_cast<double>(size) / max_load_factor())));
581  for (size_type i = 0; i < size; ++i) {
582  T tmp;
583  boost_load(ar, tmp);
584  const auto p = insert(std::move(tmp));
585  if (unlikely(!p.second)) {
586  piranha_throw(std::invalid_argument, "while deserializing a hash_set from a Boost archive "
587  "a duplicate value was encountered");
588  }
589  }
590  }
591  BOOST_SERIALIZATION_SPLIT_MEMBER()
592  // Enabler for insert().
593  template <typename U>
594  using insert_enabler = enable_if_t<std::is_same<key_type, uncvref_t<U>>::value, int>;
595  // Run a consistency check on the set, will return false if something is wrong.
596  bool sanity_check() const
597  {
598  // Ignore sanity checks on shutdown.
599  if (shutdown()) {
600  return true;
601  }
602  size_type count = 0u;
603  for (size_type i = 0u; i < bucket_count(); ++i) {
604  for (auto it = ptr()[i].begin(); it != ptr()[i].end(); ++it) {
605  if (_bucket(*it) != i) {
606  return false;
607  }
608  ++count;
609  }
610  }
611  if (count != m_n_elements) {
612  return false;
613  }
614  // m_log2_size must not be equal to or greater than the number of bits of size_type.
615  if (m_log2_size >= unsigned(std::numeric_limits<size_type>::digits)) {
616  return false;
617  }
618  // The container pointer must be consistent with the other members.
619  if (!ptr() && (m_log2_size || m_n_elements)) {
620  return false;
621  }
622  // Check size is consistent with number of iterator traversals.
623  count = 0u;
624  for (auto it = begin(); it != end(); ++it, ++count) {
625  }
626  if (count != m_n_elements) {
627  return false;
628  }
629  return true;
630  }
631  // The number of available nonzero sizes will be the number of bits in the size type. Possible nonzero sizes will be
632  // in
633  // the [2 ** 0, 2 ** (n-1)] range.
634  static const size_type m_n_nonzero_sizes = static_cast<size_type>(std::numeric_limits<size_type>::digits);
635  // Get log2 of set size at least equal to hint. To be used only when hint is not zero.
636  static size_type get_log2_from_hint(const size_type &hint)
637  {
638  piranha_assert(hint);
639  for (size_type i = 0u; i < m_n_nonzero_sizes; ++i) {
640  if ((size_type(1u) << i) >= hint) {
641  return i;
642  }
643  }
644  piranha_throw(std::bad_alloc, );
645  }
646 
647 public:
649 
652  using iterator = iterator_impl<key_type const>;
653 
654 private:
655  // Static checks on the iterator type.
657 
658 public:
660 
665 
668  using local_iterator = typename list::const_iterator;
670 
679  hash_set(const hasher &h = hasher{}, const key_equal &k = key_equal{})
680  : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
681  {
682  }
684 
702  explicit hash_set(const size_type &n_buckets, const hasher &h = hasher{}, const key_equal &k = key_equal{},
703  unsigned n_threads = 1u)
704  : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
705  {
706  init_from_n_buckets(n_buckets, n_threads);
707  }
709 
717  hash_set(const hash_set &other)
718  : m_pack(nullptr, other.hash(), other.k_equal(), other.allocator()), m_log2_size(0u), m_n_elements(0u)
719  {
720  // Proceed to actual copy only if other has some content.
721  if (other.ptr()) {
722  const size_type size = size_type(1u) << other.m_log2_size;
723  auto new_ptr = allocator().allocate(size);
724  if (unlikely(!new_ptr)) {
725  piranha_throw(std::bad_alloc, );
726  }
727  size_type i = 0u;
728  try {
729  // Copy-construct the elements of the array.
730  for (; i < size; ++i) {
731  allocator().construct(&new_ptr[i], other.ptr()[i]);
732  }
733  } catch (...) {
734  // Unwind the construction and deallocate, before re-throwing.
735  for (size_type j = 0u; j < i; ++j) {
736  allocator().destroy(&new_ptr[j]);
737  }
738  allocator().deallocate(new_ptr, size);
739  throw;
740  }
741  // Assign the members.
742  ptr() = new_ptr;
743  m_log2_size = other.m_log2_size;
744  m_n_elements = other.m_n_elements;
745  } else {
746  piranha_assert(!other.m_log2_size && !other.m_n_elements);
747  }
748  }
750 
756  hash_set(hash_set &&other) noexcept
757  : m_pack(std::move(other.m_pack)), m_log2_size(other.m_log2_size), m_n_elements(other.m_n_elements)
758  {
759  // Clear out the other one.
760  other.ptr() = nullptr;
761  other.m_log2_size = 0u;
762  other.m_n_elements = 0u;
763  }
765 
779  template <typename InputIterator>
780  explicit hash_set(const InputIterator &begin, const InputIterator &end, const size_type &n_buckets = 0u,
781  const hasher &h = hasher{}, const key_equal &k = key_equal{})
782  : m_pack(nullptr, h, k, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
783  {
784  init_from_n_buckets(n_buckets, 1u);
785  for (auto it = begin; it != end; ++it) {
786  insert(*it);
787  }
788  }
790 
800  template <typename U>
801  explicit hash_set(std::initializer_list<U> list)
802  : m_pack(nullptr, hasher{}, key_equal{}, allocator_type{}), m_log2_size(0u), m_n_elements(0u)
803  {
804  // We do not care here for possible truncation of list.size(), as this is only an optimization.
805  init_from_n_buckets(static_cast<size_type>(list.size()), 1u);
806  for (const auto &x : list) {
807  insert(x);
808  }
809  }
811 
815  {
816  piranha_assert(sanity_check());
817  destroy_and_deallocate();
818  }
820 
827  hash_set &operator=(const hash_set &other)
828  {
829  if (likely(this != &other)) {
830  hash_set tmp(other);
831  *this = std::move(tmp);
832  }
833  return *this;
834  }
836 
841  hash_set &operator=(hash_set &&other) noexcept
842  {
843  if (likely(this != &other)) {
844  destroy_and_deallocate();
845  m_pack = std::move(other.m_pack);
846  m_log2_size = other.m_log2_size;
847  m_n_elements = other.m_n_elements;
848  // Zero out other.
849  other.ptr() = nullptr;
850  other.m_log2_size = 0u;
851  other.m_n_elements = 0u;
852  }
853  return *this;
854  }
856 
860  {
861  // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut
862  // taking into account the number of elements in the set - if zero, go directly to end()?
863  const_iterator retval;
864  retval.m_set = this;
865  size_type idx = 0u;
866  const auto b_count = bucket_count();
867  for (; idx < b_count; ++idx) {
868  if (!ptr()[idx].empty()) {
869  break;
870  }
871  }
872  retval.m_idx = idx;
873  // If we are not at the end, assign proper iterator.
874  if (idx != b_count) {
875  retval.m_it = ptr()[idx].begin();
876  }
877  return retval;
878  }
880 
884  {
885  return const_iterator(this, bucket_count(), local_iterator{});
886  }
888 
892  {
893  return static_cast<hash_set const *>(this)->begin();
894  }
896 
900  {
901  return static_cast<hash_set const *>(this)->end();
902  }
904 
907  size_type size() const
908  {
909  return m_n_elements;
910  }
912 
915  bool empty() const
916  {
917  return !size();
918  }
920 
924  {
925  return (ptr()) ? (size_type(1u) << m_log2_size) : size_type(0u);
926  }
928 
931  double load_factor() const
932  {
933  const auto b_count = bucket_count();
934  return (b_count) ? static_cast<double>(size()) / static_cast<double>(b_count) : 0.;
935  }
937 
948  size_type bucket(const key_type &k) const
949  {
950  if (unlikely(!bucket_count())) {
951  piranha_throw(zero_division_error, "cannot calculate bucket index in an empty set");
952  }
953  return _bucket(k);
954  }
956 
963  const_iterator find(const key_type &k) const
964  {
965  if (unlikely(!bucket_count())) {
966  return end();
967  }
968  return _find(k, _bucket(k));
969  }
971 
979  {
980  return static_cast<const hash_set *>(this)->find(k);
981  }
983 
986  double max_load_factor() const
987  {
988  // Maximum load factor hard-coded to 1.
989  // NOTE: if this is ever made configurable, it should never be allowed to go to zero.
990  return 1.;
991  }
993 
1019  template <typename U, insert_enabler<U> = 0>
1020  std::pair<iterator, bool> insert(U &&k)
1021  {
1022  auto b_count = bucket_count();
1023  // Handle the case of a set with no buckets.
1024  if (unlikely(!b_count)) {
1025  _increase_size();
1026  // Update the bucket count.
1027  b_count = 1u;
1028  }
1029  // Try to locate the element.
1030  auto bucket_idx = _bucket(k);
1031  const auto it = _find(k, bucket_idx);
1032  if (it != end()) {
1033  // Item already present, exit.
1034  return std::make_pair(it, false);
1035  }
1036  if (unlikely(m_n_elements == std::numeric_limits<size_type>::max())) {
1037  piranha_throw(std::overflow_error, "maximum number of elements reached");
1038  }
1039  // Item is new. Handle the case in which we need to rehash because of load factor.
1040  if (unlikely(static_cast<double>(m_n_elements + size_type(1u)) / static_cast<double>(b_count)
1041  > max_load_factor())) {
1042  _increase_size();
1043  // We need a new bucket index in case of a rehash.
1044  bucket_idx = _bucket(k);
1045  }
1046  const auto it_retval = _unique_insert(std::forward<U>(k), bucket_idx);
1047  ++m_n_elements;
1048  return std::make_pair(it_retval, true);
1049  }
1051 
1066  {
1067  piranha_assert(!empty());
1068  const auto b_it = _erase(it);
1069  iterator retval;
1070  retval.m_set = this;
1071  const auto b_count = bucket_count();
1072  if (b_it == ptr()[it.m_idx].end()) {
1073  // Travel to the next iterator if the deleted element was
1074  // the last one in the bucket.
1075  auto idx = static_cast<size_type>(it.m_idx + 1u);
1076  // Advance to the first non-empty bucket if necessary,
1077  // without going past the end of the set.
1078  for (; idx < b_count; ++idx) {
1079  if (!ptr()[idx].empty()) {
1080  break;
1081  }
1082  }
1083  retval.m_idx = idx;
1084  // If we are not at the end, assign proper iterator.
1085  if (idx != b_count) {
1086  retval.m_it = ptr()[idx].begin();
1087  }
1088  // NOTE: in case we reached the end of the container, the end() iterator should be:
1089  // {this,bucket_count,local_iterator{}}
1090  // this has been set above already, bucket_count is set by retval.m_idx = idx
1091  // and the default local_iterator ctor is called by the def ctor of iterator.
1092  } else {
1093  // Otherwise, just copy over the iterator returned by _erase().
1094  retval.m_idx = it.m_idx;
1095  retval.m_it = b_it;
1096  }
1097  piranha_assert(m_n_elements);
1098  // Update the number of elements.
1099  m_n_elements = static_cast<size_type>(m_n_elements - 1u);
1100  return retval;
1101  }
1103 
1106  void clear()
1107  {
1108  destroy_and_deallocate();
1109  // Reset the members.
1110  ptr() = nullptr;
1111  m_log2_size = 0u;
1112  m_n_elements = 0u;
1113  }
1115 
1122  void swap(hash_set &other)
1123  {
1124  std::swap(m_pack, other.m_pack);
1125  std::swap(m_log2_size, other.m_log2_size);
1126  std::swap(m_n_elements, other.m_n_elements);
1127  }
1129 
1142  void rehash(const size_type &new_size, unsigned n_threads = 1u)
1143  {
1144  if (unlikely(!n_threads)) {
1145  piranha_throw(std::invalid_argument, "the number of threads must be strictly positive");
1146  }
1147  // If rehash is requested to zero, do something only if there are no items stored in the set.
1148  if (!new_size) {
1149  if (!size()) {
1150  clear();
1151  }
1152  return;
1153  }
1154  // Do nothing if rehashing to the new size would lead to exceeding the max load factor.
1155  if (static_cast<double>(size()) / static_cast<double>(new_size) > max_load_factor()) {
1156  return;
1157  }
1158  // Create a new set with needed amount of buckets.
1159  hash_set new_set(new_size, hash(), k_equal(), n_threads);
1160  try {
1161  const auto it_f = _m_end();
1162  for (auto it = _m_begin(); it != it_f; ++it) {
1163  const auto new_idx = new_set._bucket(*it);
1164  new_set._unique_insert(std::move(*it), new_idx);
1165  }
1166  } catch (...) {
1167  // Clear up both this and the new set upon any kind of error.
1168  clear();
1169  new_set.clear();
1170  throw;
1171  }
1172  // Retain the number of elements.
1173  new_set.m_n_elements = m_n_elements;
1174  // Clear the old set.
1175  clear();
1176  // Assign the new set.
1177  *this = std::move(new_set);
1178  }
1180 
1186  std::map<size_type, size_type> evaluate_sparsity() const
1187  {
1188  const auto it_f = ptr() + bucket_count();
1189  std::map<size_type, size_type> retval;
1190  size_type counter;
1191  for (auto it = ptr(); it != it_f; ++it) {
1192  counter = 0u;
1193  for (auto l_it = it->begin(); l_it != it->end(); ++l_it) {
1194  ++counter;
1195  }
1196  ++retval[counter];
1197  }
1198  return retval;
1199  }
1204 
1212  using _m_iterator = iterator_impl<key_type>;
1214 
1218  {
1219  // NOTE: this could take a while in case of an empty set with lots of buckets. Take a shortcut
1220  // taking into account the number of elements in the set - if zero, go directly to end()?
1221  const auto b_count = bucket_count();
1222  _m_iterator retval;
1223  retval.m_set = this;
1224  size_type idx = 0u;
1225  for (; idx < b_count; ++idx) {
1226  if (!ptr()[idx].empty()) {
1227  break;
1228  }
1229  }
1230  retval.m_idx = idx;
1231  // If we are not at the end, assign proper iterator.
1232  if (idx != b_count) {
1233  retval.m_it = ptr()[idx].begin();
1234  }
1235  return retval;
1236  }
1238 
1242  {
1243  return _m_iterator(this, bucket_count(), typename list::iterator{});
1244  }
1246 
1268  template <typename U, insert_enabler<U> = 0>
1269  iterator _unique_insert(U &&k, const size_type &bucket_idx)
1270  {
1271  // Assert that key is not present already in the set.
1272  piranha_assert(find(std::forward<U>(k)) == end());
1273  // Assert bucket index is correct.
1274  piranha_assert(bucket_idx == _bucket(k));
1275  auto p = ptr()[bucket_idx].insert(std::forward<U>(k));
1276  return iterator(this, bucket_idx, local_iterator(p));
1277  }
1279 
1291  const_iterator _find(const key_type &k, const size_type &bucket_idx) const
1292  {
1293  // Assert bucket index is correct.
1294  piranha_assert(bucket_idx == _bucket(k) && bucket_idx < bucket_count());
1295  const auto &b = ptr()[bucket_idx];
1296  const auto it_f = b.end();
1297  const_iterator retval(end());
1298  for (auto it = b.begin(); it != it_f; ++it) {
1299  if (k_equal()(*it, k)) {
1300  retval.m_idx = bucket_idx;
1301  retval.m_it = it;
1302  break;
1303  }
1304  }
1305  return retval;
1306  }
1308 
1315  size_type _bucket_from_hash(const std::size_t &hash) const
1316  {
1317  piranha_assert(bucket_count());
1318  return hash % (size_type(1u) << m_log2_size);
1319  }
1321 
1331  size_type _bucket(const key_type &k) const
1332  {
1333  return _bucket_from_hash(hash()(k));
1334  }
1336 
1341  void _update_size(const size_type &new_size)
1342  {
1343  m_n_elements = new_size;
1344  }
1346 
1354  {
1355  if (unlikely(m_log2_size >= m_n_nonzero_sizes - 1u)) {
1356  piranha_throw(std::bad_alloc, );
1357  }
1358  // We must take care here: if the set has zero buckets,
1359  // the next log2_size is 0u. Otherwise increase current log2_size.
1360  piranha_assert(ptr() || (!ptr() && !m_log2_size));
1361  const auto new_log2_size = (ptr()) ? (m_log2_size + 1u) : 0u;
1362  // Rehash to the new size.
1363  rehash(size_type(1u) << new_log2_size);
1364  }
1366 
1372  const list &_get_bucket_list(const size_type &idx) const
1373  {
1374  piranha_assert(idx < bucket_count());
1375  return ptr()[idx];
1376  }
1378 
1395  {
1396  // Verify the iterator is valid.
1397  piranha_assert(it.m_set == this);
1398  piranha_assert(it.m_idx < bucket_count());
1399  piranha_assert(!ptr()[it.m_idx].empty());
1400  piranha_assert(it.m_it != ptr()[it.m_idx].end());
1401  auto &bucket = ptr()[it.m_idx];
1402  // If the pointed-to element is the first one in the bucket, we need special care.
1403  if (&*it == &*bucket.m_node.ptr()) {
1404  // Destroy the payload.
1405  bucket.m_node.ptr()->~T();
1406  if (bucket.m_node.m_next == &bucket.terminator) {
1407  // Special handling if this was the only element.
1408  bucket.m_node.m_next = nullptr;
1409  return bucket.end();
1410  } else {
1411  // Store the link in the second element (this could be the terminator).
1412  auto tmp = bucket.m_node.m_next->m_next;
1413  // Move-construct from the second element, and then destroy it.
1414  ::new (static_cast<void *>(&bucket.m_node.m_storage)) T(std::move(*bucket.m_node.m_next->ptr()));
1415  bucket.m_node.m_next->ptr()->~T();
1416  ::delete bucket.m_node.m_next;
1417  // Establish the new link.
1418  bucket.m_node.m_next = tmp;
1419  return bucket.begin();
1420  }
1421  } else {
1422  auto b_it = bucket.begin();
1423  auto prev_b_it = b_it;
1424  const auto b_it_f = bucket.end();
1425  ++b_it;
1426  for (; b_it != b_it_f; ++b_it, ++prev_b_it) {
1427  if (&*b_it == &*it) {
1428  // Assign to the previous element the next link of the current one.
1429  prev_b_it.m_ptr->m_next = b_it.m_ptr->m_next;
1430  // Delete the current one.
1431  b_it.m_ptr->ptr()->~T();
1432  ::delete b_it.m_ptr;
1433  break;
1434  };
1435  }
1436  // We never want to go through the whole list, it means the element
1437  // to which 'it' refers is not here: assert that the iterator we just
1438  // erased was not end() - i.e., it was pointing to something.
1439  piranha_assert(b_it.m_ptr);
1440  // Move forward the iterator that originally preceded the erased item.
1441  // It will now point to the item past the erased one or to the local end().
1442  return ++prev_b_it;
1443  }
1444  }
1446 private:
1447  pack_type m_pack;
1448  size_type m_log2_size;
1449  size_type m_n_elements;
1450 };
1451 
1452 template <typename T, typename Hash, typename Pred>
1453 typename hash_set<T, Hash, Pred>::node hash_set<T, Hash, Pred>::list::terminator;
1454 
1455 template <typename T, typename Hash, typename Pred>
1456 const typename hash_set<T, Hash, Pred>::size_type hash_set<T, Hash, Pred>::m_n_nonzero_sizes;
1457 
1458 inline namespace impl
1459 {
1460 
1461 // Enablers for boost s11n.
1462 template <typename Archive, typename T, typename Hash, typename Pred>
1463 using hash_set_boost_save_enabler
1464  = enable_if_t<conjunction<has_boost_save<Archive, T>,
1465  has_boost_save<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>;
1466 
1467 template <typename Archive, typename T, typename Hash, typename Pred>
1468 using hash_set_boost_load_enabler
1469  = enable_if_t<conjunction<has_boost_load<Archive, T>,
1470  has_boost_load<Archive, typename hash_set<T, Hash, Pred>::size_type>>::value>;
1471 }
1472 
1474 
1483 template <typename Archive, typename T, typename Hash, typename Pred>
1484 struct boost_save_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_save_enabler<Archive, T, Hash, Pred>>
1485  : boost_save_via_boost_api<Archive, hash_set<T, Hash, Pred>> {
1486 };
1487 
1489 
1505 template <typename Archive, typename T, typename Hash, typename Pred>
1506 struct boost_load_impl<Archive, hash_set<T, Hash, Pred>, hash_set_boost_load_enabler<Archive, T, Hash, Pred>>
1507  : boost_load_via_boost_api<Archive, hash_set<T, Hash, Pred>> {
1508 };
1509 
1510 #if defined(PIRANHA_WITH_MSGPACK)
1511 
1512 inline namespace impl
1513 {
1514 
1515 // Enablers for msgpack s11n.
1516 template <typename Stream, typename T, typename Hash, typename Pred>
1517 using hash_set_msgpack_pack_enabler
1518  = enable_if_t<conjunction<is_msgpack_stream<Stream>,
1519  has_safe_cast<std::uint32_t, typename hash_set<T, Hash, Pred>::size_type>,
1520  has_msgpack_pack<Stream, T>>::value>;
1521 
1522 template <typename T>
1523 using hash_set_msgpack_convert_enabler = enable_if_t<has_msgpack_convert<T>::value>;
1524 }
1525 
1527 
1534 template <typename Stream, typename T, typename Hash, typename Pred>
1535 struct msgpack_pack_impl<Stream, hash_set<T, Hash, Pred>, hash_set_msgpack_pack_enabler<Stream, T, Hash, Pred>> {
1537 
1551  void operator()(msgpack::packer<Stream> &p, const hash_set<T, Hash, Pred> &h, msgpack_format f) const
1552  {
1553  msgpack_pack_range(p, h.begin(), h.end(), h.size(), f);
1554  }
1555 };
1556 
1558 
1562 template <typename T, typename Hash, typename Pred>
1563 struct msgpack_convert_impl<hash_set<T, Hash, Pred>, hash_set_msgpack_convert_enabler<T>> {
1565 
1581  void operator()(hash_set<T, Hash, Pred> &h, const msgpack::object &o, msgpack_format f) const
1582  {
1583  // Clear out the retval.
1585  // Extract the array of items as a vector of objects.
1586  std::vector<msgpack::object> items;
1587  o.convert(items);
1588  // Prepare the number of buckets.
1589  h.rehash(boost::numeric_cast<typename hash_set<T, Hash, Pred>::size_type>(
1590  std::ceil(static_cast<double>(items.size()) / h.max_load_factor())));
1591  // Deserialize the items.
1592  for (const auto &obj : items) {
1593  T tmp;
1594  msgpack_convert(tmp, obj, f);
1595  const auto p = h.insert(std::move(tmp));
1596  if (unlikely(!p.second)) {
1597  piranha_throw(std::invalid_argument, "while deserializing a hash_set from a msgpack object "
1598  "a duplicate value was encountered");
1599  }
1600  }
1601  }
1602 };
1603 
1604 #endif
1605 }
1606 
1607 #endif
hash_set & operator=(const hash_set &other)
Copy assignment operator.
Definition: hash_set.hpp:827
void boost_load(Archive &ar, T &x)
Load from Boost archive.
Definition: s11n.hpp:469
local_iterator _erase(const_iterator it)
Erase element.
Definition: hash_set.hpp:1394
Default implementation of piranha::boost_load().
Definition: s11n.hpp:391
iterator end()
End iterator.
Definition: hash_set.hpp:899
void msgpack_convert(T &x, const msgpack::object &o, msgpack_format f)
Convert msgpack object.
Definition: s11n.hpp:957
void _increase_size()
Increase bucket count.
Definition: hash_set.hpp:1353
const_iterator find(const key_type &k) const
Find element.
Definition: hash_set.hpp:963
std::map< size_type, size_type > evaluate_sparsity() const
Get information on the sparsity of the set.
Definition: hash_set.hpp:1186
Implementation of piranha::boost_load() via the Boost API.
Definition: s11n.hpp:363
void rehash(const size_type &new_size, unsigned n_threads=1u)
Rehash set.
Definition: hash_set.hpp:1142
Exceptions.
const_iterator end() const
Const end iterator.
Definition: hash_set.hpp:883
Default implementation of piranha::boost_save().
Definition: s11n.hpp:245
STL namespace.
iterator_impl< key_type > _m_iterator
Mutable iterator.
Definition: hash_set.hpp:1212
iterator begin()
Begin iterator.
Definition: hash_set.hpp:891
void _update_size(const size_type &new_size)
Force update of the number of elements.
Definition: hash_set.hpp:1341
static enqueue_t< F &&, Args &&... > enqueue(unsigned n, F &&f, Args &&... args)
Enqueue task.
iterator find(const key_type &k)
Find element.
Definition: hash_set.hpp:978
std::equal_to< term_type > key_equal
Functor type for comparing the items in the set.
Definition: hash_set.hpp:377
iterator erase(const_iterator it)
Erase element.
Definition: hash_set.hpp:1065
Forward iterator type trait.
size_type bucket_count() const
Number of buckets.
Definition: hash_set.hpp:923
size_type size() const
Number of elements contained in the set.
Definition: hash_set.hpp:907
Default functor for the implementation of piranha::msgpack_convert().
Definition: s11n.hpp:867
#define piranha_throw(exception_type,...)
Exception-throwing macro.
Definition: exceptions.hpp:118
void operator()(hash_set< T, Hash, Pred > &h, const msgpack::object &o, msgpack_format f) const
Call operator.
Definition: hash_set.hpp:1581
void clear()
Remove all elements.
Definition: hash_set.hpp:1106
Default functor for the implementation of piranha::msgpack_pack().
Definition: s11n.hpp:686
Exception for signalling division by zero.
Definition: exceptions.hpp:136
msgpack_format
Serialization format for msgpack.
Definition: s11n.hpp:673
size_type _bucket(const key_type &k) const
Index of destination bucket (low-level).
Definition: hash_set.hpp:1331
size_type _bucket_from_hash(const std::size_t &hash) const
Index of destination bucket from hash value.
Definition: hash_set.hpp:1315
const_iterator begin() const
Const begin iterator.
Definition: hash_set.hpp:859
hash_set(const InputIterator &begin, const InputIterator &end, const size_type &n_buckets=0u, const hasher &h=hasher{}, const key_equal &k=key_equal{})
Constructor from range.
Definition: hash_set.hpp:780
std::pair< iterator, bool > insert(U &&k)
Insert element.
Definition: hash_set.hpp:1020
size_type bucket(const key_type &k) const
Index of destination bucket.
Definition: hash_set.hpp:948
_m_iterator _m_begin()
Mutable begin iterator.
Definition: hash_set.hpp:1217
_m_iterator _m_end()
Mutable end iterator.
Definition: hash_set.hpp:1241
const_iterator _find(const key_type &k, const size_type &bucket_idx) const
Find element (low-level).
Definition: hash_set.hpp:1291
Implementation of piranha::boost_save() via the Boost API.
Definition: s11n.hpp:217
Hash set.
Definition: hash_set.hpp:122
const list & _get_bucket_list(const size_type &idx) const
Const reference to list in bucket.
Definition: hash_set.hpp:1372
double load_factor() const
Load factor.
Definition: hash_set.hpp:931
Root piranha namespace.
Definition: array_key.hpp:52
std::hash< term_type > hasher
Functor type for the calculation of hash values.
Definition: hash_set.hpp:375
Detect the presence of piranha::msgpack_pack().
Definition: s11n.hpp:607
Type traits.
void operator()(msgpack::packer< Stream > &p, const hash_set< T, Hash, Pred > &h, msgpack_format f) const
Call operator.
Definition: hash_set.hpp:1551
iterator _unique_insert(U &&k, const size_type &bucket_idx)
Insert unique element (low-level).
Definition: hash_set.hpp:1269
std::size_t size_type
Size type.
Definition: hash_set.hpp:384
bool empty() const
Test for empty set.
Definition: hash_set.hpp:915
Type trait to detect piranha::safe_cast().
Definition: safe_cast.hpp:237
void boost_save(Archive &ar, const T &x)
Save to Boost archive.
Definition: s11n.hpp:328
term_type key_type
Key type.
Definition: hash_set.hpp:379
Type trait to detect equality function objects.
iterator_impl< key_type const > iterator
Iterator type.
Definition: hash_set.hpp:652
Type trait to detect hash function objects.
Type trait for well-behaved container elements.
hash_set(const hasher &h=hasher{}, const key_equal &k=key_equal{})
Default constructor.
Definition: hash_set.hpp:679
hash_set(const size_type &n_buckets, const hasher &h=hasher{}, const key_equal &k=key_equal{}, unsigned n_threads=1u)
Constructor from number of buckets.
Definition: hash_set.hpp:702
typename list::const_iterator local_iterator
Local iterator.
Definition: hash_set.hpp:668
#define PIRANHA_TT_CHECK(tt,...)
Macro for static type trait checks.
hash_set & operator=(hash_set &&other) noexcept
Move assignment operator.
Definition: hash_set.hpp:841
hash_set(hash_set &&other) noexcept
Move constructor.
Definition: hash_set.hpp:756
~hash_set()
Destructor.
Definition: hash_set.hpp:814
hash_set(const hash_set &other)
Copy constructor.
Definition: hash_set.hpp:717
double max_load_factor() const
Maximum load factor.
Definition: hash_set.hpp:986
hash_set(std::initializer_list< U > list)
Constructor from initializer list.
Definition: hash_set.hpp:801
void swap(hash_set &other)
Swap content.
Definition: hash_set.hpp:1122
iterator const_iterator
Const iterator type.
Definition: hash_set.hpp:663