piranha/doxygen/thread__pool_8hpp_source.html

 /* Copyright 2009-2017 Francesco Biscani (bluescarni@gmail.com)

 This file is part of the Piranha library.

 The Piranha library is free software; you can redistribute it and/or modify
 it under the terms of either:

   * the GNU Lesser General Public License as published by the Free
     Software Foundation; either version 3 of the License, or (at your
     option) any later version.

 or

   * the GNU General Public License as published by the Free Software
     Foundation; either version 3 of the License, or (at your option) any
     later version.

 or both in parallel, as here.

 The Piranha library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received copies of the GNU General Public License and the
 GNU Lesser General Public License along with the Piranha library.  If not,
 see https://www.gnu.org/licenses/. */

 #ifndef PIRANHA_THREAD_POOL_HPP
 #define PIRANHA_THREAD_POOL_HPP

 #include <algorithm>
 #include <atomic>
 #include <boost/lexical_cast.hpp>
 #include <condition_variable>
 #include <cstdlib>
 #include <functional>
 #include <future>
 // See old usage of cout below.
 // #include <iostream>
 #include <list>
 #include <memory>
 #include <mutex>
 #include <queue>
 #include <stdexcept>
 #include <string>
 #include <thread>
 #include <type_traits>
 #include <unordered_set>
 #include <utility>
 #include <vector>

 #include <piranha/config.hpp>
 #include <piranha/detail/atomic_lock_guard.hpp>
 #include <piranha/detail/mpfr.hpp>
 #include <piranha/exceptions.hpp>
 #include <piranha/mp_integer.hpp>
 #include <piranha/runtime_info.hpp>
 #include <piranha/thread_management.hpp>
 #include <piranha/type_traits.hpp>

 namespace piranha
 {

 inline namespace impl
 {

 // Task queue class. Inspired by:
 // https://github.com/progschj/ThreadPool
 struct task_queue {
     task_queue(unsigned n, bool bind) : m_stop(false)
     {
         auto runner = [this, n, bind]() {
             if (bind) {
                 try {
                     bind_to_proc(n);
                 } catch (...) {
                     // Don't stop if we cannot bind.
                     // NOTE: logging candidate.
                 }
             }
             try {
                 while (true) {
                     std::unique_lock<std::mutex> lock(this->m_mutex);
                     while (!this->m_stop && this->m_tasks.empty()) {
                         // Need to wait for something to happen only if the task
                         // list is empty and we are not stopping.
                         // NOTE: wait will be noexcept in C++14.
                         this->m_cond.wait(lock);
                     }
                     if (this->m_stop && this->m_tasks.empty()) {
                         // If the stop flag was set, and we do not have more tasks,
                         // just exit.
                         break;
                     }
                     // NOTE: move constructor of std::function could throw, unfortunately.
                     std::function<void()> task(std::move(this->m_tasks.front()));
                     this->m_tasks.pop();
                     lock.unlock();
                     task();
                 }
             } catch (...) {
                 // The errors we could get here are:
                 // - threading primitives,
                 // - move-construction of std::function,
                 // - queue popping (I guess unlikely, as the destructor of std::function
                 //   is noexcept).
                 // In any case, not much that can be done to recover from this, better to abort.
                 // NOTE: logging candidate.
                 std::abort();
             }
             // Free the MPFR caches.
             ::mpfr_free_cache();
         };
         m_thread = std::thread(std::move(runner));
     }
     ~task_queue()
     {
         // NOTE: logging candidate (catch any exception,
         // log it and abort as there is not much we can do).
         try {
             stop();
         } catch (...) {
             std::abort();
         }
     }
     // Small utility to remove reference_wrapper.
     template <typename T>
     struct unwrap_ref {
         using type = T;
     };
     template <typename T>
     struct unwrap_ref<std::reference_wrapper<T>> {
         using type = T;
     };
     template <typename T>
     using unwrap_ref_t = typename unwrap_ref<T>::type;
     // NOTE: the functor F will be forwarded to std::bind in order to create a nullary wrapper. The nullary wrapper
     // will create copies of the input arguments, and it will then pass these copies as lvalue refs to a copy of the
     // original functor when the call operator is invoked (with special handling of reference wrappers). Thus, the
     // real invocation of F is not simply F(args), but this more complicated type below.
     // NOTE: this is one place where it seems we really want decay instead of uncvref, as decay is applied
     // also by std::bind() to F.
     template <typename F, typename... Args>
     using f_ret_type = decltype(std::declval<decay_t<F> &>()(std::declval<unwrap_ref_t<uncvref_t<Args>> &>()...));
     // enqueue() will be enabled if:
     // - f_ret_type is a valid type (checked in the return type),
     // - we can construct the nullary wrapper via std::bind() (this requires F and Args to be ctible from the input
     //   arguments),
     // - we can build a packaged_task from the nullary wrapper (requires F and Args to be move/copy ctible),
     // - the return type of F is returnable.
     template <typename F, typename... Args>
     using enabler
         = enable_if_t<conjunction<std::is_constructible<decay_t<F>, F>, std::is_constructible<uncvref_t<Args>, Args>...,
                                   disjunction<std::is_copy_constructible<decay_t<F>>,
                                               std::is_move_constructible<decay_t<F>>>,
                                   conjunction<disjunction<std::is_copy_constructible<uncvref_t<Args>>,
                                                           std::is_move_constructible<uncvref_t<Args>>>>...,
                                   is_returnable<f_ret_type<F, Args...>>>::value,
                       int>;
     // Main enqueue function.
     template <typename F, typename... Args, enabler<F &&, Args &&...> = 0>
     std::future<f_ret_type<F &&, Args &&...>> enqueue(F &&f, Args &&... args)
     {
         using ret_type = f_ret_type<F &&, Args &&...>;
         using p_task_type = std::packaged_task<ret_type()>;
         // NOTE: here we have a multi-stage construction of the task:
         // - std::bind() turns F into a nullary functor,
         // - std::packaged_task gives us the std::future machinery,
         // - std::function (in m_tasks) gives the uniform type interface via type erasure.
         auto task = std::make_shared<p_task_type>(std::bind(std::forward<F>(f), std::forward<Args>(args)...));
         std::future<ret_type> res = task->get_future();
         {
             std::unique_lock<std::mutex> lock(m_mutex);
             if (unlikely(m_stop)) {
                 // Enqueueing is not allowed if the queue is stopped.
                 piranha_throw(std::runtime_error, "cannot enqueue task while the task queue is stopping");
             }
             m_tasks.push([task]() { (*task)(); });
         }
         // NOTE: notify_one is noexcept.
         m_cond.notify_one();
         return res;
     }
     // NOTE: we call this only from dtor, it is here in order to be able to test it.
     // So the exception handling in dtor will suffice, keep it in mind if things change.
     void stop()
     {
         {
             std::unique_lock<std::mutex> lock(m_mutex);
             if (m_stop) {
                 // Already stopped.
                 return;
             }
             m_stop = true;
         }
         // Notify the thread that queue has been stopped, wait for it
         // to consume the remaining tasks and exit.
         m_cond.notify_one();
         m_thread.join();
     }
     // Data members.
     bool m_stop;
     std::condition_variable m_cond;
     std::mutex m_mutex;
     std::queue<std::function<void()>> m_tasks;
     std::thread m_thread;
 };

 // Type to represent thread queues: a vector of task queues paired with a set of thread ids.
 using thread_queues_t = std::pair<std::vector<std::unique_ptr<task_queue>>, std::unordered_set<std::thread::id>>;

 inline thread_queues_t get_initial_thread_queues()
 {
     // NOTE: we used to have this print statement here, but it turns out that
     // in certain setups the cout object is not yet constructed at this point,
     // and a segfault is generated. I *think* it is possible to enforce the creation
     // of cout via construction of an init object:
     // http://en.cppreference.com/w/cpp/io/ios_base/Init
     // However, this is hardly essential. Let's leave this disabled for the moment.
     // std::cout << "Initializing the thread pool.\n";
     thread_queues_t retval;
     // Create the vector of queues.
     const unsigned candidate = runtime_info::get_hardware_concurrency(), hc = (candidate > 0u) ? candidate : 1u;
     retval.first.reserve(static_cast<decltype(retval.first.size())>(hc));
     for (unsigned i = 0u; i < hc; ++i) {
         // NOTE: thread binding is disabled on startup.
         retval.first.emplace_back(::new task_queue(i, false));
     }
     // Generate the set of thread IDs.
     for (const auto &ptr : retval.first) {
         auto p = retval.second.insert(ptr->m_thread.get_id());
         (void)p;
         piranha_assert(p.second);
     }
     return retval;
 }

 template <typename = void>
 struct thread_pool_base {
     static thread_queues_t s_queues;
     static bool s_bind;
     static std::atomic_flag s_atf;
 };

 template <typename T>
 thread_queues_t thread_pool_base<T>::s_queues = get_initial_thread_queues();

 template <typename T>
 std::atomic_flag thread_pool_base<T>::s_atf = ATOMIC_FLAG_INIT;

 template <typename T>
 bool thread_pool_base<T>::s_bind = false;

 template <typename>
 void thread_pool_shutdown();
 }


 // \todo work around MSVC bug in destruction of statically allocated threads (if needed once we support MSVC), as per:
 // http://stackoverflow.com/questions/10915233/stdthreadjoin-hangs-if-called-after-main-exits-when-using-vs2012-rc
 // detach() and wait as a workaround?
 // \todo try to understand if we can suppress the future list class below in favour of STL-like algorithms.
 template <typename T = void>
 class thread_pool_ : private thread_pool_base<>
 {
     friend void piranha::impl::thread_pool_shutdown<T>();
     using base = thread_pool_base<>;
     // Enabler for use_threads.
     template <typename Int>
     using use_threads_enabler
         = enable_if_t<disjunction<std::is_same<Int, integer>,
                                   conjunction<std::is_integral<Int>, std::is_unsigned<Int>>>::value,
                       int>;
     // The return type for enqueue().
     template <typename F, typename... Args>
     using enqueue_t = decltype(std::declval<task_queue &>().enqueue(std::declval<F>(), std::declval<Args>()...));

 public:

     template <typename F, typename... Args>
     static enqueue_t<F &&, Args &&...> enqueue(unsigned n, F &&f, Args &&... args)
     {
         detail::atomic_lock_guard lock(s_atf);
         if (unlikely(n >= s_queues.first.size())) {
             piranha_throw(std::invalid_argument, "the thread index " + std::to_string(n)
                                                      + " is out of range, the thread pool contains only "
                                                      + std::to_string(s_queues.first.size()) + " threads");
         }
         return base::s_queues.first[static_cast<decltype(base::s_queues.first.size())>(n)]->enqueue(
             std::forward<F>(f), std::forward<Args>(args)...);
     }

     static unsigned size()
     {
         detail::atomic_lock_guard lock(s_atf);
         return static_cast<unsigned>(base::s_queues.first.size());
     }

 private:
     // Helper function to create 'new_size' new queues with thread binding set to 'bind'.
     static thread_queues_t create_new_queues(unsigned new_size, bool bind)
     {
         thread_queues_t new_queues;
         // Create the task queues.
         new_queues.first.reserve(static_cast<decltype(new_queues.first.size())>(new_size));
         for (auto i = 0u; i < new_size; ++i) {
             new_queues.first.emplace_back(::new task_queue(i, bind));
         }
         // Fill in the thread ids set.
         for (const auto &ptr : new_queues.first) {
             auto p = new_queues.second.insert(ptr->m_thread.get_id());
             (void)p;
             piranha_assert(p.second);
         }
         return new_queues;
     }
     // Shutdown. This can be used to stop the threads at program shutdown.
     static void shutdown()
     {
         thread_queues_t new_queues;
         detail::atomic_lock_guard lock(s_atf);
         new_queues.swap(base::s_queues);
     }

 public:

     static void resize(unsigned new_size)
     {
         if (unlikely(new_size == 0u)) {
             piranha_throw(std::invalid_argument, "cannot resize the thread pool to zero");
         }
         // NOTE: need to lock here as we are reading the s_bind member.
         detail::atomic_lock_guard lock(s_atf);
         auto new_queues = create_new_queues(new_size, base::s_bind);
         // NOTE: here the allocator is not swapped, as std::allocator won't propagate on swap.
         // Besides, all instances of std::allocator are equal, so the operation is well-defined.
         // http://en.cppreference.com/w/cpp/container/vector/swap
         // This holds for both std::vector and std::unordered_set.
         // If an exception gets actually thrown, no big deal.
         // NOTE: the dtor of the queues is effectively noexcept, as the program will just abort in case of errors
         // in the dtor.
         new_queues.swap(base::s_queues);
     }

     static void set_binding(bool flag)
     {
         detail::atomic_lock_guard lock(s_atf);
         if (flag == base::s_bind) {
             // Don't do anything if we are not changing the binding policy.
             return;
         }
         auto new_queues = create_new_queues(static_cast<unsigned>(base::s_queues.first.size()), flag);
         new_queues.swap(base::s_queues);
         base::s_bind = flag;
     }

     static bool get_binding()
     {
         detail::atomic_lock_guard lock(s_atf);
         return base::s_bind;
     }

     template <typename Int, use_threads_enabler<Int> = 0>
     static unsigned use_threads(const Int &work_size, const Int &min_work_per_thread)
     {
         // Check input params.
         if (unlikely(work_size <= Int(0))) {
             piranha_throw(std::invalid_argument, "invalid value of " + boost::lexical_cast<std::string>(work_size)
                                                      + " for work size (it must be strictly positive)");
         }
         if (unlikely(min_work_per_thread <= Int(0))) {
             piranha_throw(std::invalid_argument, "invalid value of "
                                                      + boost::lexical_cast<std::string>(min_work_per_thread)
                                                      + " for minimum work per thread (it must be strictly positive)");
         }
         detail::atomic_lock_guard lock(s_atf);
         // Don't use threads if the calling thread belongs to the pool.
         if (base::s_queues.second.find(std::this_thread::get_id()) != base::s_queues.second.end()) {
             return 1u;
         }
         const auto n_threads = static_cast<unsigned>(base::s_queues.first.size());
         piranha_assert(n_threads);
         if (work_size / n_threads >= min_work_per_thread) {
             // Enough work per thread, use them all.
             return n_threads;
         }
         // Return a number of threads such that each thread consumes at least min_work_per_thread.
         // Never return 0.
         return static_cast<unsigned>(std::max(Int(1), static_cast<Int>(work_size / min_work_per_thread)));
     }
 };


 using thread_pool = thread_pool_<>;

 inline namespace impl
 {

 template <typename>
 inline void thread_pool_shutdown()
 {
     thread_pool::shutdown();
 }
 }


 // NOTE: we could provide method to retrieve future values from get_all() using a vector (in case the future type
 // is not void or a reference, in which case the get_all() method stays as it is).
 template <typename T>
 class future_list
 {
     // Wait on a valid future, or abort.
     static void wait_or_abort(const std::future<T> &fut)
     {
         piranha_assert(fut.valid());
         try {
             fut.wait();
         } catch (...) {
             // NOTE: logging candidate, with info from exception.
             std::abort();
         }
     }

 public:

     future_list() = default;
     future_list(const future_list &) = delete;
     future_list(future_list &&) = delete;

 private:
     future_list &operator=(const future_list &) = delete;
     future_list &operator=(future_list &&) = delete;

 public:

     ~future_list()
     {
         wait_all();
     }

     void push_back(std::future<T> &&f)
     {
         // Push back empty future.
         try {
             m_list.emplace_back();
         } catch (...) {
             // If we get some error here, we want to make sure we wait on the future
             // before escaping out.
             // NOTE: calling wait() on an invalid future is UB.
             if (f.valid()) {
                 wait_or_abort(f);
             }
             throw;
         }
         // This cannot throw.
         m_list.back() = std::move(f);
     }

     void wait_all()
     {
         for (auto &f : m_list) {
             if (f.valid()) {
                 wait_or_abort(f);
             }
         }
     }

     void get_all()
     {
         for (auto &f : m_list) {
             // NOTE: std::future's valid() method is noexcept.
             if (f.valid()) {
                 (void)f.get();
             }
         }
     }

 private:
     std::list<std::future<T>> m_list;
 };
 }

 #endif
piranha::future_list::wait_all
void wait_all()
Wait on all the futures.
Definition: thread_pool.hpp:582

piranha::bind_to_proc
void bind_to_proc(unsigned n)
Bind thread to specific processor.
Definition: thread_management.hpp:109

piranha::thread_pool_::use_threads
static unsigned use_threads(const Int &work_size, const Int &min_work_per_thread)
Compute number of threads to use.
Definition: thread_pool.hpp:460

exceptions.hpp
Exceptions.

std
STL namespace.

piranha::future_list::get_all
void get_all()
Get all the futures.
Definition: thread_pool.hpp:597

piranha::thread_pool_::enqueue
static enqueue_t< F &&, Args &&... > enqueue(unsigned n, F &&f, Args &&... args)
Enqueue task.
Definition: thread_pool.hpp:322

piranha_throw
#define piranha_throw(exception_type,...)
Exception-throwing macro.
Definition: exceptions.hpp:118

piranha::future_list::future_list
future_list()=default
Defaulted default constructor.

piranha::future_list
Class to store a list of futures.
Definition: thread_pool.hpp:513

piranha
Root piranha namespace.
Definition: array_key.hpp:52

type_traits.hpp
Type traits.

piranha::thread_pool_::size
static unsigned size()
Size.
Definition: thread_pool.hpp:337

piranha::future_list::~future_list
~future_list()
Destructor.
Definition: thread_pool.hpp:547

piranha::runtime_info::get_hardware_concurrency
static unsigned get_hardware_concurrency()
Hardware concurrency.
Definition: runtime_info.hpp:96

piranha::thread_pool_::set_binding
static void set_binding(bool flag)
Set the thread binding policy.
Definition: thread_pool.hpp:416

piranha::thread_pool_
Static thread pool.
Definition: thread_pool.hpp:279

piranha::thread_pool_::resize
static void resize(unsigned new_size)
Change the number of threads.
Definition: thread_pool.hpp:383

piranha::thread_pool_::get_binding
static bool get_binding()
Get the thread binding policy.
Definition: thread_pool.hpp:434

piranha::future_list::push_back
void push_back(std::future< T > &&f)
Move-insert a future.
Definition: thread_pool.hpp:561