mdds
trie_map.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2015-2020 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_TRIE_MAP_HPP
30 #define INCLUDED_MDDS_TRIE_MAP_HPP
31 
32 #include "trie_map_itr.hpp"
33 
34 #include <vector>
35 #include <string>
36 #include <deque>
37 #include <map>
38 #include <memory>
39 
40 namespace mdds {
41 
42 namespace trie {
43 
47 template<typename ContainerT>
49 {
51  using key_type = ContainerT;
52 
60 
66  using key_unit_type = typename key_type::value_type;
67 
77  static key_buffer_type to_key_buffer(const key_unit_type* str, size_t length)
78  {
79  return key_buffer_type(str, length);
80  }
81 
91  {
92  return key_buffer_type(key);
93  }
94 
95  static const key_unit_type* buffer_data(const key_buffer_type& buf)
96  {
97  return buf.data();
98  }
99 
100  static size_t buffer_size(const key_buffer_type& buf)
101  {
102  return buf.size();
103  }
104 
112  static void push_back(key_buffer_type& buffer, key_unit_type c)
113  {
114  buffer.push_back(c);
115  }
116 
123  static void pop_back(key_buffer_type& buffer)
124  {
125  buffer.pop_back();
126  }
127 
136  static key_type to_key(const key_buffer_type& buf)
137  {
138  return buf;
139  }
140 };
141 
142 using std_string_trait = std_container_trait<std::string>;
143 
145 template<typename T>
147 {
148  static constexpr bool variable_size = false;
149 
150  static constexpr size_t value_size = sizeof(T);
151 
152  static void write(std::ostream& os, const T& v);
153 
154  static void read(std::istream& is, size_t n, T& v);
155 };
156 
158 template<typename T>
160 {
161  static constexpr bool variable_size = true;
162 
163  static void write(std::ostream& os, const T& v);
164 
165  static void read(std::istream& is, size_t n, T& v);
166 };
167 
172 template<typename T>
174 {
176 
177  static constexpr bool variable_size = true;
178 
179  static void write(std::ostream& os, const T& v);
180 
181  static void read(std::istream& is, size_t n, T& v);
182 };
183 
191 template<typename T, typename U = void>
193 {
194 };
195 
196 template<typename T>
197 struct value_serializer<T, typename std::enable_if<has_value_type<T>::value>::type>
199 {
200 };
201 
202 template<>
203 struct value_serializer<std::string> : variable_value_serializer<std::string>
204 {
205 };
206 
207 } // namespace trie
208 
209 template<typename _KeyTrait, typename _ValueT>
210 class packed_trie_map;
211 
218 template<typename _KeyTrait, typename _ValueT>
219 class trie_map
220 {
221  friend class packed_trie_map<_KeyTrait, _ValueT>;
222  friend class trie::detail::iterator_base<trie_map, true>;
223  friend class trie::detail::iterator_base<trie_map, false>;
225  friend class trie::detail::iterator<trie_map>;
229 
230 public:
232  typedef _KeyTrait key_trait_type;
233  typedef typename key_trait_type::key_type key_type;
234  typedef typename key_trait_type::key_buffer_type key_buffer_type;
235  typedef typename key_trait_type::key_unit_type key_unit_type;
236  typedef _ValueT value_type;
237  typedef size_t size_type;
238  typedef std::pair<key_type, value_type> key_value_type;
239 
243 
244 private:
245  struct trie_node
246  {
247  typedef std::map<key_unit_type, trie_node> children_type;
248 
249  children_type children;
250  value_type value;
251  bool has_value;
252 
253  trie_node();
254  trie_node(const trie_node& other);
255  trie_node(trie_node&& other);
256 
257  void swap(trie_node& other);
258  };
259 
260  template<bool _IsConst>
261  struct stack_item
262  {
263  using _is_const = bool_constant<_IsConst>;
264 
266 
267  using trie_node_type = typename const_or_not<trie_node, _is_const>::type;
268 
269  trie_node_type* node;
270  child_pos_type child_pos;
271 
272  stack_item(trie_node_type* _node, const child_pos_type& _child_pos) : node(_node), child_pos(_child_pos)
273  {}
274 
275  bool operator==(const stack_item& r) const
276  {
277  return node == r.node && child_pos == r.child_pos;
278  }
279 
280  bool operator!=(const stack_item& r) const
281  {
282  return !operator==(r);
283  }
284  };
285 
286  using const_node_stack_type = std::vector<stack_item<true>>;
287  using node_stack_type = std::vector<stack_item<false>>;
288 
289 public:
294 
295  trie_map(const trie_map& other);
296 
297  trie_map(trie_map&& other);
298 
299  const_iterator begin() const;
300 
301  iterator begin();
302 
303  const_iterator end() const;
304 
305  iterator end();
306 
307  trie_map& operator=(trie_map other);
308 
309  void swap(trie_map& other);
310 
317  void insert(const key_type& key, const value_type& value);
318 
327  void insert(const key_unit_type* key, size_type len, const value_type& value);
328 
338  bool erase(const key_unit_type* key, size_type len);
339 
348  const_iterator find(const key_type& key) const;
349 
360  const_iterator find(const key_unit_type* input, size_type len) const;
361 
370  iterator find(const key_type& key);
371 
382  iterator find(const key_unit_type* input, size_type len);
383 
394  search_results prefix_search(const key_type& prefix) const;
395 
408  search_results prefix_search(const key_unit_type* prefix, size_type len) const;
409 
415  size_type size() const;
416 
417  bool empty() const noexcept;
418 
422  void clear();
423 
431  packed_type pack() const;
432 
433 private:
434  void insert_into_tree(
435  trie_node& node, const key_unit_type* key, const key_unit_type* key_end, const value_type& value);
436 
437  const trie_node* find_prefix_node(
438  const trie_node& node, const key_unit_type* prefix, const key_unit_type* prefix_end) const;
439 
440  template<bool _IsConst>
441  void find_prefix_node_with_stack(
442  std::vector<stack_item<_IsConst>>& node_stack, const_t<trie_node, _IsConst>& node, const key_unit_type* prefix,
443  const key_unit_type* prefix_end) const;
444 
445  template<bool _IsConst>
446  key_buffer_type build_key_buffer_from_node_stack(const std::vector<stack_item<_IsConst>>& node_stack) const;
447 
448  void count_values(size_type& n, const trie_node& node) const;
449 
450 private:
451  trie_node m_root;
452 };
453 
464 template<typename _KeyTrait, typename _ValueT>
466 {
469 
470 public:
471  typedef _KeyTrait key_trait_type;
472  typedef typename key_trait_type::key_type key_type;
473  typedef typename key_trait_type::key_buffer_type key_buffer_type;
474  typedef typename key_trait_type::key_unit_type key_unit_type;
475  typedef _ValueT value_type;
476  typedef size_t size_type;
477  typedef std::pair<key_type, value_type> key_value_type;
480 
485  struct entry
486  {
487  const key_unit_type* key;
488  size_type keylen;
489  value_type value;
490 
491  entry(const key_unit_type* _key, size_type _keylen, value_type _value)
492  : key(_key), keylen(_keylen), value(_value)
493  {}
494  };
495 
496 private:
497  struct trie_node
498  {
499  key_unit_type key;
500  const value_type* value;
501 
502  std::deque<trie_node*> children;
503 
504  trie_node(key_unit_type _key) : key(_key), value(nullptr)
505  {}
506  };
507 
508  struct stack_item
509  {
510  const uintptr_t* node_pos;
511  const uintptr_t* child_pos;
512  const uintptr_t* child_end;
513 
514  stack_item(const uintptr_t* _node_pos, const uintptr_t* _child_pos, const uintptr_t* _child_end)
515  : node_pos(_node_pos), child_pos(_child_pos), child_end(_child_end)
516  {}
517 
518  bool operator==(const stack_item& other) const
519  {
520  return node_pos == other.node_pos && child_pos == other.child_pos;
521  }
522 
523  bool operator!=(const stack_item& other) const
524  {
525  return !operator==(other);
526  }
527 
528  bool has_value() const
529  {
530  const value_type* pv = reinterpret_cast<const value_type*>(*node_pos);
531  return pv;
532  }
533 
534  const value_type* get_value() const
535  {
536  return reinterpret_cast<const value_type*>(*node_pos);
537  }
538  };
539 
540  typedef std::vector<stack_item> node_stack_type;
541 
542  typedef std::deque<trie_node> node_pool_type;
543  typedef std::vector<uintptr_t> packed_type;
544  typedef std::deque<value_type> value_store_type;
545  typedef std::vector<std::tuple<size_t, key_unit_type>> child_offsets_type;
546 
547 public:
548  packed_trie_map();
549 
558  packed_trie_map(const entry* entries, size_type entry_size);
559 
567 
568  packed_trie_map(const packed_trie_map& other);
569 
571 
572  packed_trie_map& operator=(packed_trie_map other);
573 
574  bool operator==(const packed_trie_map& other) const;
575 
576  bool operator!=(const packed_trie_map& other) const;
577 
578  const_iterator begin() const;
579 
580  const_iterator end() const;
581 
582  const_iterator cbegin() const;
583 
584  const_iterator cend() const;
585 
594  const_iterator find(const key_type& key) const;
595 
606  const_iterator find(const key_unit_type* input, size_type len) const;
607 
617  search_results prefix_search(const key_type& prefix) const;
618 
631  search_results prefix_search(const key_unit_type* prefix, size_type len) const;
632 
638  size_type size() const noexcept;
639 
640  bool empty() const noexcept;
641 
642  void swap(packed_trie_map& other);
643 
649  template<typename _Func = trie::value_serializer<value_type>>
650  void save_state(std::ostream& os) const;
651 
658  template<typename _Func = trie::value_serializer<value_type>>
659  void load_state(std::istream& is);
660 
666  void dump_structure() const;
667 
668 private:
669  node_stack_type get_root_stack() const;
670 
671  void traverse_range(
672  trie_node& root, node_pool_type& node_pool, const entry* start, const entry* end, size_type pos);
673 
674  size_type compact_node(const trie_node& node);
675  size_type compact_node(const typename trie_map<_KeyTrait, _ValueT>::trie_node& node);
676 
677  void push_child_offsets(size_type offset, const child_offsets_type& child_offsets);
678 
679  void compact(const trie_node& root);
680  void compact(const typename trie_map<_KeyTrait, _ValueT>::trie_node& root);
681 
682  const uintptr_t* find_prefix_node(
683  const uintptr_t* p, const key_unit_type* prefix, const key_unit_type* prefix_end) const;
684 
685  void find_prefix_node_with_stack(
686  node_stack_type& node_stack, const uintptr_t* p, const key_unit_type* prefix,
687  const key_unit_type* prefix_end) const;
688 
689  template<typename _Handler>
690  void traverse_tree(_Handler hdl) const;
691 
692  template<typename _Handler>
693  void traverse_buffer(_Handler hdl) const;
694 
695 #ifdef MDDS_TRIE_MAP_DEBUG
696  void dump_node(key_buffer_type& buffer, const trie_node& node) const;
697  void dump_trie(const trie_node& root) const;
698  void dump_packed_trie() const;
699 #endif
700 
701 private:
702  value_store_type m_value_store;
703  packed_type m_packed;
704 };
705 
706 } // namespace mdds
707 
708 #include "trie_map_def.inl"
709 
710 #endif
711 
712 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: trie_map.hpp:466
const_iterator find(const key_unit_type *input, size_type len) const
search_results prefix_search(const key_unit_type *prefix, size_type len) const
size_type size() const noexcept
const_iterator find(const key_type &key) const
packed_trie_map(const trie_map< key_trait_type, value_type > &other)
packed_trie_map(const entry *entries, size_type entry_size)
search_results prefix_search(const key_type &prefix) const
Definition: trie_map_itr.hpp:375
Definition: trie_map_itr.hpp:89
Definition: trie_map_itr.hpp:350
Definition: trie_map_itr.hpp:501
Definition: trie_map_itr.hpp:795
Definition: trie_map_itr.hpp:440
Definition: trie_map.hpp:220
void insert(const key_type &key, const value_type &value)
search_results prefix_search(const key_unit_type *prefix, size_type len) const
search_results prefix_search(const key_type &prefix) const
size_type size() const
packed_type pack() const
iterator find(const key_unit_type *input, size_type len)
const_iterator find(const key_unit_type *input, size_type len) const
bool erase(const key_unit_type *key, size_type len)
void insert(const key_unit_type *key, size_type len, const value_type &value)
iterator find(const key_type &key)
const_iterator find(const key_type &key) const
Definition: global.hpp:147
Definition: global.hpp:165
Definition: trie_map.hpp:486
Definition: trie_map_itr.hpp:70
Definition: trie_map.hpp:147
Definition: trie_map.hpp:49
key_type key_buffer_type
Definition: trie_map.hpp:59
static void push_back(key_buffer_type &buffer, key_unit_type c)
Definition: trie_map.hpp:112
static key_buffer_type to_key_buffer(const key_unit_type *str, size_t length)
Definition: trie_map.hpp:77
static key_type to_key(const key_buffer_type &buf)
Definition: trie_map.hpp:136
static key_buffer_type to_key_buffer(const key_type &key)
Definition: trie_map.hpp:90
ContainerT key_type
Definition: trie_map.hpp:51
typename key_type::value_type key_unit_type
Definition: trie_map.hpp:66
static void pop_back(key_buffer_type &buffer)
Definition: trie_map.hpp:123
Definition: trie_map.hpp:193
Definition: trie_map.hpp:160