piranha  0.10
memory.hpp
Go to the documentation of this file.
1 /* Copyright 2009-2017 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8  * the GNU Lesser General Public License as published by the Free
9  Software Foundation; either version 3 of the License, or (at your
10  option) any later version.
11 
12 or
13 
14  * the GNU General Public License as published by the Free Software
15  Foundation; either version 3 of the License, or (at your option) any
16  later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library. If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_MEMORY_HPP
30 #define PIRANHA_MEMORY_HPP
31 
36 #include <cstddef>
37 #include <limits>
38 #include <memory>
39 #include <new>
40 #include <stdexcept>
41 #include <type_traits>
42 #include <utility>
43 #include <vector>
44 
45 #include <piranha/config.hpp>
46 #include <piranha/exceptions.hpp>
47 #include <piranha/thread_pool.hpp>
48 #include <piranha/type_traits.hpp>
49 
50 #if defined(PIRANHA_HAVE_POSIX_MEMALIGN) // POSIX memalign.
51 // NOTE: this can go away once we have support for std::align.
52 #define PIRANHA_HAVE_MEMORY_ALIGNMENT_PRIMITIVES
53 #include <cstdlib>
54 #elif defined(_WIN32) // Windows _aligned_malloc.
55 #define PIRANHA_HAVE_MEMORY_ALIGNMENT_PRIMITIVES
56 #include <malloc.h>
57 #else // malloc + std::align.
58 #include <cstdlib>
59 #include <limits>
60 #include <memory>
61 #endif
62 
63 namespace piranha
64 {
65 
66 namespace detail
67 {
68 
69 // Possible implementation of aligned malloc using std::align (not yet available in GCC).
70 // NOTE: this needs to be tested and debugged.
71 #if 0
72 inline void *cpp_aligned_alloc(const std::size_t &alignment, const std::size_t &size)
73 {
74  if (unlikely(alignment == 0u || size > std::numeric_limits<std::size_t>::max() - alignment)) {
75  piranha_throw(std::bad_alloc,);
76  }
77  // This is the actual allocated size.
78  std::size_t a_size = size + alignment;
79  const std::size_t orig_a_size = a_size;
80  // Allocate enough space for payload and alignment.
81  void *u_ptr = std::malloc(a_size), *orig_u_ptr = u_ptr;
82  if (unlikely(u_ptr == nullptr)) {
83  piranha_throw(std::bad_alloc,);
84  }
85  // Try to align.
86  void *ptr = std::align(alignment,size,u_ptr,a_size);
87  if (unlikely(ptr == nullptr)) {
88  std::free(orig_u_ptr);
89  piranha_throw(std::bad_alloc,);
90  }
91  // Some sanity checks.
92  piranha_assert(orig_a_size >= a_size);
93  // If the size has changed, the pointer must have too.
94  piranha_assert(orig_a_size == a_size || orig_u_ptr != ptr);
95  // Number of bytes taken by the alignment.
96  const std::size_t delta = (orig_a_size == a_size) ? alignment : orig_a_size - a_size;
97  piranha_assert(delta <= alignment);
98  piranha_assert(delta > 0u);
99  // The delta needs to be representable by unsigned char for storage.
100  if (unlikely(delta > std::numeric_limits<unsigned char>::max())) {
101  // NOTE: need to use original pointer, as u_ptr might have been changed by std::align.
102  std::free(orig_u_ptr);
103  piranha_throw(std::bad_alloc,);
104  }
105  // If the original pointer was already aligned, we have to move it forward.
106  if (orig_u_ptr == ptr) {
107  ptr = static_cast<void *>(static_cast<unsigned char *>(ptr) + alignment);
108  }
109  // Record the delta.
110  *(static_cast<unsigned char *>(ptr) - 1) = static_cast<unsigned char>(delta);
111  return ptr;
112 }
113 #endif
114 }
115 
117 
134 inline void *aligned_palloc(const std::size_t &alignment, const std::size_t &size)
135 {
136  // Platform-independent part: special values for alignment and size.
137  if (unlikely(size == 0u)) {
138  return nullptr;
139  }
140  if (alignment == 0u) {
141  void *ptr = std::malloc(size);
142  if (unlikely(ptr == nullptr)) {
143  piranha_throw(std::bad_alloc, );
144  }
145  return ptr;
146  }
147 #if defined(PIRANHA_HAVE_POSIX_MEMALIGN)
148  void *ptr;
149  const int retval = ::posix_memalign(&ptr, alignment, size);
150  if (unlikely(retval != 0)) {
151  piranha_throw(std::bad_alloc, );
152  }
153  return ptr;
154 #elif defined(_WIN32)
155  void *ptr = ::_aligned_malloc(size, alignment);
156  if (unlikely(ptr == nullptr)) {
157  piranha_throw(std::bad_alloc, );
158  }
159  return ptr;
160 #else
161  // return detail::cpp_aligned_alloc(alignment,size);
162  piranha_throw(not_implemented_error, "memory alignment primitives are not available");
163 #endif
164 }
165 
167 
182 inline void aligned_pfree(const std::size_t &alignment, void *ptr)
183 {
184  if (unlikely(ptr == nullptr)) {
185  return;
186  }
187  if (alignment == 0u) {
188  std::free(ptr);
189  return;
190  }
191 #if defined(PIRANHA_HAVE_POSIX_MEMALIGN)
192  std::free(ptr);
193 #elif defined(_WIN32)
194  ::_aligned_free(ptr);
195 #else
196  piranha_throw(not_implemented_error, "memory alignment primitives are not available");
197 #endif
198 }
199 
201 
223 template <typename T>
224 inline bool alignment_check(const std::size_t &alignment)
225 {
226  // Platform-independent checks.
227  if (alignment == 0u) {
228  return true;
229  }
230  // Alignment must be power of 2.
231  if (unlikely(static_cast<bool>(alignment & (alignment - 1u)))) {
232  return false;
233  }
234  // Alignment must not be less than the natural alignment of T. We just need the '<' check
235  // as we already know that alignment is either a multiple of alignof(T) or a divisor.
236  if (unlikely(alignment < alignof(typename std::decay<T>::type))) {
237  return false;
238  }
239 #if defined(PIRANHA_HAVE_POSIX_MEMALIGN)
240  // Extra check for posix_memalign requirements.
241  if (unlikely(static_cast<bool>(alignment % sizeof(void *)))) {
242  return false;
243  }
244 #endif
245  return true;
246 }
247 
249 
270 template <typename T, typename = typename std::enable_if<is_container_element<T>::value>::type>
271 inline void parallel_value_init(T *ptr, const std::size_t &size, const unsigned &n_threads)
272 {
273  using ranges_vector = std::vector<std::pair<T *, T *>>;
274  using rv_size_type = typename ranges_vector::size_type;
275  if (unlikely(ptr == nullptr)) {
276  piranha_assert(!size);
277  return;
278  }
279  // Initing functor.
280  auto init_function = [](T *start, T *end, const unsigned &thread_idx, ranges_vector *rv) {
281  auto orig_start = start;
282  try {
283  for (; start != end; ++start) {
284  ::new (static_cast<void *>(start)) T();
285  }
286  } catch (...) {
287  // Cleanup.
288  for (; orig_start != start; ++orig_start) {
289  orig_start->~T();
290  }
291  // Re-throw.
292  throw;
293  }
294  // If the init was successful and we are in multithreaded mode, record
295  // the range that was inited.
296  if (rv != nullptr) {
297  (*rv)[static_cast<rv_size_type>(thread_idx)].first = orig_start;
298  (*rv)[static_cast<rv_size_type>(thread_idx)].second = end;
299  }
300  };
301  if (n_threads <= 1) {
302  init_function(ptr, ptr + size, 0u, nullptr);
303  } else {
304  // Init the ranges vector with (ptr,ptr) pairs, so they are empty ranges.
305  ranges_vector inited_ranges(static_cast<rv_size_type>(n_threads), std::make_pair(ptr, ptr));
306  if (unlikely(inited_ranges.size() != n_threads)) {
307  piranha_throw(std::bad_alloc, );
308  }
309  // Work per thread.
310  const auto wpt = static_cast<std::size_t>(size / n_threads);
312  try {
313  for (auto i = 0u; i < n_threads; ++i) {
314  auto start = ptr + i * wpt, end = (i == n_threads - 1u) ? ptr + size : ptr + (i + 1u) * wpt;
315  f_list.push_back(thread_pool::enqueue(i, init_function, start, end, i, &inited_ranges));
316  }
317  f_list.wait_all();
318  f_list.get_all();
319  } catch (...) {
320  f_list.wait_all();
321  // Rollback the ranges that were inited.
322  for (const auto &p : inited_ranges) {
323  for (auto start = p.first; start != p.second; ++start) {
324  start->~T();
325  }
326  }
327  throw;
328  }
329  }
330 }
331 
333 
348 template <typename T, typename = typename std::enable_if<is_container_element<T>::value>::type>
349 inline void parallel_destroy(T *ptr, const std::size_t &size, const unsigned &n_threads)
350 {
351  using ranges_vector = std::vector<std::pair<T *, T *>>;
352  using rv_size_type = typename ranges_vector::size_type;
353  // Nothing to be done for null pointers.
354  if (unlikely(ptr == nullptr)) {
355  piranha_assert(!size);
356  return;
357  }
358  // Nothing needs to be done for trivially destructible types:
359  // http://en.cppreference.com/w/cpp/language/lifetime
360  if (std::is_trivially_destructible<T>::value) {
361  return;
362  }
363  // Destroy functor.
364  // NOTE: GCC suggests adding noexcept for some reason, which is fair as we are
365  // forcing T to have a non-throwing dtor. Let's double check with a static
366  // assert in any case.
367  auto destroy_function = [](T * start, T * end) noexcept
368  {
369  for (; start != end; ++start) {
370  static_assert(noexcept(start->~T()), "This destructor cannot throw.");
371  start->~T();
372  }
373  };
374  if (n_threads <= 1u) {
375  destroy_function(ptr, ptr + size);
376  } else {
377  // A vector of ranges representing elements yet to be destroyed in case something goes wrong
378  // in the multithreaded part.
379  ranges_vector d_ranges;
381  try {
382  d_ranges.resize(static_cast<rv_size_type>(n_threads), std::make_pair(ptr, ptr));
383  if (unlikely(d_ranges.size() != n_threads)) {
384  piranha_throw(std::bad_alloc, );
385  }
386  } catch (...) {
387  // Just perform the single-threaded version, and GTFO.
388  destroy_function(ptr, ptr + size);
389  return;
390  }
391  try {
392  // Work per thread.
393  const auto wpt = static_cast<std::size_t>(size / n_threads);
394  // The ranges to destroy in the cleanup phase are, initially, all the ranges.
395  for (auto i = 0u; i < n_threads; ++i) {
396  auto start = ptr + i * wpt, end = (i == n_threads - 1u) ? ptr + size : ptr + (i + 1u) * wpt;
397  d_ranges[static_cast<rv_size_type>(i)] = std::make_pair(start, end);
398  }
399  // Perform the actual destruction and update the d_ranges vector.
400  for (auto i = 0u; i < n_threads; ++i) {
401  auto start = d_ranges[static_cast<rv_size_type>(i)].first,
402  end = d_ranges[static_cast<rv_size_type>(i)].second;
403  f_list.push_back(thread_pool::enqueue(i, destroy_function, start, end));
404  // The range needs not to be destroyed anymore, as the enqueue/push_back was successful. Replace
405  // with an empty range.
406  d_ranges[static_cast<rv_size_type>(i)].first = ptr;
407  d_ranges[static_cast<rv_size_type>(i)].second = ptr;
408  }
409  f_list.wait_all();
410  // NOTE: T is a container_element, no need to get exceptions here.
411  } catch (...) {
412  f_list.wait_all();
413  // If anything failed in the multithreaded part, just destroy in single-thread the ranges
414  // that were not destroyed.
415  for (const auto &p : d_ranges) {
416  destroy_function(p.first, p.second);
417  }
418  }
419  }
420 }
421 
422 namespace detail
423 {
424 
425 // Deleter functor to be used in std::unique_ptr.
426 template <typename T>
427 class parallel_deleter
428 {
429 public:
430  explicit parallel_deleter(const std::size_t &size, const unsigned &n_threads) : m_size(size), m_n_threads(n_threads)
431  {
432  }
433  void operator()(T *ptr) const
434  {
435  // Parallel destroy and pfree are no-ops with nullptr. All of this
436  // is noexcept.
437  parallel_destroy(ptr, m_size, m_n_threads);
438  aligned_pfree(0u, static_cast<void *>(ptr));
439  }
440 
441 private:
442  std::size_t m_size;
443  unsigned m_n_threads;
444 };
445 }
446 
448 
474 template <typename T, typename = typename std::enable_if<is_container_element<T>::value>::type>
475 inline std::unique_ptr<T[], detail::parallel_deleter<T>> make_parallel_array(const std::size_t &size,
476  const unsigned &n_threads)
477 {
478  if (unlikely(size > std::numeric_limits<std::size_t>::max() / sizeof(T))) {
479  piranha_throw(std::bad_alloc, );
480  }
481  // Allocate space. This could be nullptr if size is zero.
482  auto ptr = static_cast<T *>(aligned_palloc(0u, static_cast<std::size_t>(size * sizeof(T))));
483  try {
484  // No problems here with nullptr, will be a no-op.
485  parallel_value_init(ptr, size, n_threads);
486  } catch (...) {
487  piranha_assert(ptr != nullptr);
488  // Free the allocated memory. This is noexcept.
489  aligned_pfree(0u, static_cast<void *>(ptr));
490  throw;
491  }
492  return std::unique_ptr<T[], detail::parallel_deleter<T>>(ptr, detail::parallel_deleter<T>{size, n_threads});
493 }
494 }
495 
496 #endif
void wait_all()
Wait on all the futures.
void * aligned_palloc(const std::size_t &alignment, const std::size_t &size)
Allocate memory aligned to a specific value.
Definition: memory.hpp:134
void parallel_value_init(T *ptr, const std::size_t &size, const unsigned &n_threads)
Parallel value initialisation.
Definition: memory.hpp:271
std::unique_ptr< T[], detail::parallel_deleter< T > > make_parallel_array(const std::size_t &size, const unsigned &n_threads)
Create an array in parallel.
Definition: memory.hpp:475
Exceptions.
void get_all()
Get all the futures.
static enqueue_t< F &&, Args &&... > enqueue(unsigned n, F &&f, Args &&... args)
Enqueue task.
void parallel_destroy(T *ptr, const std::size_t &size, const unsigned &n_threads)
Parallel destruction.
Definition: memory.hpp:349
#define piranha_throw(exception_type,...)
Exception-throwing macro.
Definition: exceptions.hpp:118
Class to store a list of futures.
Root piranha namespace.
Definition: array_key.hpp:52
Exception for functionality not implemented or not available on the current platform.
Definition: exceptions.hpp:128
void aligned_pfree(const std::size_t &alignment, void *ptr)
Free memory allocated via piranha::aligned_alloc.
Definition: memory.hpp:182
Type traits.
bool alignment_check(const std::size_t &alignment)
Alignment checks.
Definition: memory.hpp:224
void push_back(std::future< T > &&f)
Move-insert a future.