piranha  0.10
symbol_utils.hpp
1 /* Copyright 2009-2017 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8  * the GNU Lesser General Public License as published by the Free
9  Software Foundation; either version 3 of the License, or (at your
10  option) any later version.
11 
12 or
13 
14  * the GNU General Public License as published by the Free Software
15  Foundation; either version 3 of the License, or (at your option) any
16  later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library. If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_SYMBOL_UTILS_HPP
30 #define PIRANHA_SYMBOL_UTILS_HPP
31 
32 #include <algorithm>
33 #include <iterator>
34 #include <limits>
35 #include <stdexcept>
36 #include <string>
37 #include <tuple>
38 #include <type_traits>
39 #include <utility>
40 #include <vector>
41 
42 #include <boost/container/container_fwd.hpp>
43 #include <boost/container/flat_map.hpp>
44 #include <boost/container/flat_set.hpp>
45 #include <boost/iterator/filter_iterator.hpp>
46 #include <boost/iterator/transform_iterator.hpp>
47 #include <boost/iterator/zip_iterator.hpp>
48 #include <boost/tuple/tuple.hpp>
49 
50 #include <piranha/config.hpp>
51 #include <piranha/exceptions.hpp>
52 #include <piranha/safe_cast.hpp>
53 
54 namespace piranha
55 {
56 
58 
61 using symbol_fset = boost::container::flat_set<std::string>;
62 
64 
67 template <typename T>
68 using symbol_fmap = boost::container::flat_map<std::string, T>;
69 
71 
74 using symbol_idx = symbol_fset::size_type;
75 
77 
80 using symbol_idx_fset = boost::container::flat_set<symbol_idx>;
81 
83 
86 template <typename T>
87 using symbol_idx_fmap = boost::container::flat_map<symbol_idx, T>;
88 
89 inline namespace impl
90 {
91 
92 // This function will do symbol-merging for a vector-like key. Vector must support a minimal std::vector interface,
93 // and the value type must be constructible from zero and copy ctible.
94 template <typename Vector>
95 inline void vector_key_merge_symbols(Vector &retval, const Vector &v, const symbol_idx_fmap<symbol_fset> &ins_map,
96  const symbol_fset &orig_args)
97 {
98  using value_type = typename Vector::value_type;
99  if (unlikely(v.size() != orig_args.size())) {
100  // The usual sanity check.
101  piranha_throw(std::invalid_argument, "invalid argument(s) for symbol set merging: the size of the original "
102  "symbol set ("
103  + std::to_string(orig_args.size())
104  + ") must be equal to the key's size (" + std::to_string(v.size())
105  + ")");
106  }
107  if (unlikely(!ins_map.size())) {
108  // If we have nothing to insert, it means something is wrong: we should never invoke
109  // symbol merging if there's nothing to merge.
110  piranha_throw(std::invalid_argument,
111  "invalid argument(s) for symbol set merging: the insertion map cannot be empty");
112  }
113  if (unlikely(ins_map.rbegin()->first > v.size())) {
114  // The last element of the insertion map must be at most v.size(), which means that there
115  // are symbols to be appended at the end.
116  piranha_throw(std::invalid_argument,
117  "invalid argument(s) for symbol set merging: the last index of the insertion map ("
118  + std::to_string(ins_map.rbegin()->first) + ") must not be greater than the key's size ("
119  + std::to_string(v.size()) + ")");
120  }
121  // NOTE: possibility of reserve(), if we ever implement it in static/small vector.
122  piranha_assert(retval.size() == 0u);
123  auto map_it = ins_map.begin();
124  const auto map_end = ins_map.end();
125  for (decltype(v.size()) i = 0; i < v.size(); ++i) {
126  if (map_it != map_end && map_it->first == i) {
127  // NOTE: here and below we should really have a better way to insert
128  // a block of zeroes rather than using a back inserter, especially
129  // for our custom vector classes. Revisit this eventually.
130  std::fill_n(std::back_inserter(retval), map_it->second.size(), value_type(0));
131  ++map_it;
132  }
133  retval.push_back(v[i]);
134  }
135  // We could still have symbols which need to be appended at the end.
136  if (map_it != map_end) {
137  std::fill_n(std::back_inserter(retval), map_it->second.size(), value_type(0));
138  piranha_assert(map_it + 1 == map_end);
139  }
140 }
141 }
142 
144 
169 inline std::tuple<symbol_fset, symbol_idx_fmap<symbol_fset>, symbol_idx_fmap<symbol_fset>>
170 ss_merge(const symbol_fset &s1, const symbol_fset &s2)
171 {
172  // Use a local cache for the accumulation of the union.
173  PIRANHA_MAYBE_TLS std::vector<std::string> v_cache;
174  using v_cache_size_t = decltype(v_cache.size());
175  // NOTE: the max size of the union is the sum of the two sizes, make sure
176  // we can compute that safely.
177  if (unlikely(s2.size() > std::numeric_limits<v_cache_size_t>::max()
178  || s1.size() > std::numeric_limits<v_cache_size_t>::max() - s2.size())) {
179  piranha_throw(std::overflow_error,
180  "overflow in the computation of the size of the union of two symbol sets of sizes "
181  + std::to_string(s1.size()) + " and " + std::to_string(s2.size()));
182  }
183  const auto max_size = static_cast<v_cache_size_t>(static_cast<v_cache_size_t>(s1.size()) + s2.size());
184  if (v_cache.size() < max_size) {
185  // Enlarge if needed.
186  v_cache.resize(max_size);
187  }
188  // Do the union.
189  const auto u_end = std::set_union(s1.begin(), s1.end(), s2.begin(), s2.end(), v_cache.begin());
190  // Create the output set out of the union, knowing that it is ordered by construction.
191  symbol_fset u_set{boost::container::ordered_unique_range_t{}, v_cache.begin(), u_end};
192  auto compute_map = [&u_set](const symbol_fset &s) {
194  // NOTE: max size of retval is the original size + 1
195  // (there could be an insertion before each and every element of s, plus an
196  // insertion at the end).
197  // NOTE: here the static_cast is fine, even if the size type were
198  // a short unsigned integral. The conversion rules ensure that the addition
199  // is always done with unsigned arithmetic, thanks to the presence of 1u
200  // as operand.
201  retval.reserve(static_cast<decltype(retval.size())>(s.size() + 1u));
202  auto u_it = u_set.cbegin();
203  for (decltype(s.size()) i = 0; i < s.size(); ++i, ++u_it) {
204  piranha_assert(u_it != u_set.end());
205  const auto &cur_sym = *s.nth(i);
206  if (*u_it < cur_sym) {
207  const auto new_it = retval.emplace_hint(retval.end(), i, symbol_fset{*u_it});
208  for (++u_it; *u_it < cur_sym; ++u_it) {
209  piranha_assert(u_it != u_set.end());
210  new_it->second.insert(new_it->second.end(), *u_it);
211  }
212  piranha_assert(*u_it == cur_sym);
213  }
214  }
215  // Insertions at the end.
216  if (u_it != u_set.cend()) {
217  const auto new_it = retval.emplace_hint(retval.end(), s.size(), symbol_fset{*u_it});
218  for (++u_it; u_it != u_set.cend(); ++u_it) {
219  new_it->second.insert(new_it->second.end(), *u_it);
220  }
221  }
222  return retval;
223  };
224  return std::make_tuple(std::move(u_set), compute_map(s1), compute_map(s2));
225 }
226 
228 
237 inline symbol_idx ss_index_of(const symbol_fset &ref, const std::string &name)
238 {
239  return ref.index_of(ref.find(name));
240 }
241 
242 inline namespace impl
243 {
244 
245 // Functor for use in the filter iterator below.
246 struct mask_ss_filter {
247  template <typename Tuple>
248  bool operator()(const Tuple &t) const
249  {
250  // NOTE: the filter iterator skips when this returns false.
251  // Since the mask is 1 for elements to be skipped, we need to return
252  // true if the mask is zero.
253  return t.template get<0>() == char(0);
254  }
255 };
256 
257 // Functor for use in the transform iterator below.
258 struct mask_ss_transform {
259  template <typename Tuple>
260  const std::string &operator()(const Tuple &t) const
261  {
262  // NOTE: we need to extract the symbol name, which is
263  // the second element of the tuple.
264  return t.template get<1>();
265  }
266 };
267 }
268 
270 
287 inline symbol_fset ss_trim(const symbol_fset &s, const std::vector<char> &mask)
288 {
289  if (unlikely(s.size() != mask.size())) {
290  // The usual sanity check.
291  piranha_throw(std::invalid_argument,
292  "invalid argument(s) for symbol set trimming: the size of the original symbol set ("
293  + std::to_string(s.size()) + ") differs from the size of trimming mask ("
294  + std::to_string(mask.size()) + ")");
295  }
296  // First we zip together s and mask.
297  auto zip_begin = boost::make_zip_iterator(boost::make_tuple(mask.begin(), s.begin()));
298  auto zip_end = boost::make_zip_iterator(boost::make_tuple(mask.end(), s.end()));
299  // Then we create a filter on the zip: iterate only over those zipped values in which
300  // the first element (i.e., the mask) is 0.
301  auto filter_begin = boost::make_filter_iterator(mask_ss_filter{}, zip_begin, zip_end);
302  auto filter_end = boost::make_filter_iterator(mask_ss_filter{}, zip_end, zip_end);
303  // Finally, we need to take the skipping iterator and extract only the second element
304  // of each value (the actual symbol name).
305  auto trans_begin = boost::make_transform_iterator(filter_begin, mask_ss_transform{});
306  auto trans_end = boost::make_transform_iterator(filter_end, mask_ss_transform{});
307  // Now we can construct the return value. We know it is ordered because the original
308  // symbol set is ordered, and we just skip over some elements.
309  return symbol_fset{boost::container::ordered_unique_range_t{}, trans_begin, trans_end};
310 }
311 
313 
330 {
331  using it_diff_t = decltype(s1.end() - s1.begin());
332  using it_udiff_t = std::make_unsigned<it_diff_t>::type;
333  // NOTE: let's make sure all the indices in s1 can be represented by the iterator diff type.
334  // This makes the computation of s1_it - s1_it_b later safe.
335  if (unlikely(s1.size() > static_cast<it_udiff_t>(std::numeric_limits<it_diff_t>::max()))) {
337  std::overflow_error,
338  "overflow in the determination of the indices of the intersection of two symbol_fset: the size of one of "
339  "the sets ("
340  + std::to_string(s1.size())
341  + ") is larger than the maximum value representable by the difference type of symbol_fset's iterators ("
342  + std::to_string(std::numeric_limits<it_diff_t>::max()) + ")");
343  }
344  // Use a local vector cache to build the result.
345  PIRANHA_MAYBE_TLS std::vector<symbol_idx> vidx;
346  // The max possible size of the intersection is the minimum size of the
347  // two input sets.
348  const auto max_size = std::min(s1.size(), s2.size());
349  // Enlarge vidx if needed.
350  if (vidx.size() < max_size) {
351  vidx.resize(safe_cast<decltype(vidx.size())>(max_size));
352  }
353  auto vidx_it = vidx.begin();
354  const auto s1_it_b = s1.begin(), s1_it_f = s1.end();
355  auto s1_it = s1_it_b;
356  for (const auto &sym : s2) {
357  // Try to locate sym into s1, using s1_it to store the result
358  // of the search.
359  s1_it = std::lower_bound(s1_it, s1_it_f, sym);
360  if (s1_it == s1_it_f) {
361  // No symbol >= sym was found in s1, we
362  // can break out as no more symbols from s2 can
363  // be found in s1.
364  break;
365  }
366  // Now s1_it points to a symbol which is >= sym.
367  if (*s1_it == sym) {
368  // We found sym in s1, record its index.
369  // NOTE: we know by construction that s1_it - s1_it_b is nonnegative, hence we can
370  // cast it safely to the unsigned counterpart. We also know we can compute it safely
371  // because we checked earlier. Finally, we need a safe cast in principle as symbol_idx
372  // and the unsigned counterpart of it_diff_t might be different (in reality, safe_cast
373  // will probably be optimised out).
374  piranha_assert(vidx_it != vidx.end());
375  *(vidx_it++) = safe_cast<symbol_idx>(static_cast<it_udiff_t>(s1_it - s1_it_b));
376  // Bump up s1_it: we want to start searching from the next
377  // element in the next loop iteration.
378  ++s1_it;
379  }
380  }
381  // Build the return value. We know that, by construction, vidx has been built
382  // as a sorted vector.
383  assert(std::is_sorted(vidx.begin(), vidx_it));
384  return symbol_idx_fset{boost::container::ordered_unique_range_t{}, vidx.begin(), vidx_it};
385 }
386 
387 inline namespace impl
388 {
389 
390 // Enabler for sm_intersect_idx.
391 template <typename T>
392 using has_sm_intersect_idx
393  = conjunction<std::is_default_constructible<T>, std::is_copy_assignable<T>, std::is_copy_constructible<T>>;
394 
395 template <typename T>
396 using sm_intersect_idx_enabler = enable_if_t<has_sm_intersect_idx<T>::value, int>;
397 }
398 
424 template <typename T, sm_intersect_idx_enabler<T> = 0>
426 {
427  // NOTE: the code here is quite similar to the other similar function above,
428  // just a few small differences. Perhaps we can refactor out some common
429  // code eventually.
430  using it_diff_t = decltype(s.end() - s.begin());
431  using it_udiff_t = std::make_unsigned<it_diff_t>::type;
432  // NOTE: let's make sure all the indices in s can be represented by the iterator diff type.
433  // This makes the computation of s_it - s_it_b later safe.
434  if (unlikely(s.size() > static_cast<it_udiff_t>(std::numeric_limits<it_diff_t>::max()))) {
436  std::overflow_error,
437  "overflow in the determination of the indices of the intersection of a symbol_fset and a symbol_fmap: the "
438  "size of the set ("
439  + std::to_string(s.size())
440  + ") is larger than the maximum value representable by the difference type of symbol_fset's iterators ("
441  + std::to_string(std::numeric_limits<it_diff_t>::max()) + ")");
442  }
443  // Use a local vector cache to build the result.
444  PIRANHA_MAYBE_TLS std::vector<std::pair<symbol_idx, T>> vidx;
445  // The max possible size of the intersection is the minimum size of the
446  // two input objects.
447  const auto max_size
448  = std::min<typename std::common_type<decltype(s.size()), decltype(m.size())>::type>(s.size(), m.size());
449  // Enlarge vidx if needed.
450  if (vidx.size() < max_size) {
451  vidx.resize(safe_cast<decltype(vidx.size())>(max_size));
452  }
453  auto vidx_it = vidx.begin();
454  const auto s_it_b = s.begin(), s_it_f = s.end();
455  auto s_it = s_it_b;
456  for (const auto &p : m) {
457  const auto &sym = p.first;
458  // Try to locate the current symbol in s, using s_it to store the result
459  // of the search.
460  s_it = std::lower_bound(s_it, s_it_f, sym);
461  if (s_it == s_it_f) {
462  // No symbol >= sym was found in s, we
463  // can break out as no more symbols from m can
464  // be found in s.
465  break;
466  }
467  // Now s_it points to a symbol which is >= sym.
468  if (*s_it == sym) {
469  // We found sym in s, record its index.
470  // NOTE: we know by construction that s_it - s_it_b is nonnegative, hence we can
471  // cast it safely to the unsigned counterpart. We also know we can compute it safely
472  // because we checked earlier. Finally, we need a safe cast in principle as symbol_idx
473  // and the unsigned counterpart of it_diff_t might be different (in reality, safe_cast
474  // will probably be optimised out).
475  piranha_assert(vidx_it != vidx.end());
476  // Store the index and the mapped value.
477  vidx_it->first = safe_cast<symbol_idx>(static_cast<it_udiff_t>(s_it - s_it_b));
478  vidx_it->second = p.second;
479  ++vidx_it;
480  // Bump up s_it: we want to start searching from the next
481  // element in the next loop iteration.
482  ++s_it;
483  }
484  }
485  // Build the return value. We know that, by construction, vidx has been built
486  // as a sorted vector.
487  return symbol_idx_fmap<T>{boost::container::ordered_unique_range_t{}, vidx.begin(), vidx_it};
488 }
489 }
490 
491 #endif
symbol_idx_fset ss_intersect_idx(const symbol_fset &s1, const symbol_fset &s2)
Find the indices of the intersection of two symbol_fset.
boost::container::flat_map< std::string, T > symbol_fmap
Flat map of symbols.
Exceptions.
std::tuple< symbol_fset, symbol_idx_fmap< symbol_fset >, symbol_idx_fmap< symbol_fset > > ss_merge(const symbol_fset &s1, const symbol_fset &s2)
Merge two symbol_fset.
#define piranha_throw(exception_type,...)
Exception-throwing macro.
Definition: exceptions.hpp:118
boost::container::flat_set< std::string > symbol_fset
Flat set of symbols.
symbol_idx_fmap< T > sm_intersect_idx(const symbol_fset &s, const symbol_fmap< T > &m)
Find the indices of the intersection of a symbol_fset and a symbol_fmap.
symbol_fset::size_type symbol_idx
Symbol index.
symbol_idx ss_index_of(const symbol_fset &ref, const std::string &name)
Identify the index of a symbol in a symbol_fset.
symbol_fset ss_trim(const symbol_fset &s, const std::vector< char > &mask)
Trim a symbol_fset.
boost::container::flat_map< symbol_idx, T > symbol_idx_fmap
Flat map of symbol indices.
Root piranha namespace.
Definition: array_key.hpp:52
boost::container::flat_set< symbol_idx > symbol_idx_fset
Flat set of symbol indices.
safe_cast_type< To, From > safe_cast(const From &x)
Safe cast.
Definition: safe_cast.hpp:219