mdds
segment_tree.hpp
1 /*************************************************************************
2  *
3  * Copyright (c) 2010-2015 Kohei Yoshida
4  *
5  * Permission is hereby granted, free of charge, to any person
6  * obtaining a copy of this software and associated documentation
7  * files (the "Software"), to deal in the Software without
8  * restriction, including without limitation the rights to use,
9  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following
12  * conditions:
13  *
14  * The above copyright notice and this permission notice shall be
15  * included in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24  * OTHER DEALINGS IN THE SOFTWARE.
25  *
26  ************************************************************************/
27 
28 #ifndef INCLUDED_MDDS_SEGMENTTREE_HPP
29 #define INCLUDED_MDDS_SEGMENTTREE_HPP
30 
31 #include "mdds/node.hpp"
32 #include "mdds/global.hpp"
33 
34 #include <vector>
35 #include <iostream>
36 #include <map>
37 #include <unordered_map>
38 #include <memory>
39 
40 #ifdef MDDS_UNIT_TEST
41 #include <sstream>
42 #endif
43 
44 namespace mdds {
45 
46 template<typename _Key, typename _Value>
48 {
49 public:
50  typedef _Key key_type;
51  typedef _Value value_type;
52  typedef size_t size_type;
53  typedef std::vector<value_type> search_results_type;
54 
55 #ifdef MDDS_UNIT_TEST
56  struct segment_data
57  {
58  key_type begin_key;
59  key_type end_key;
60  value_type pdata;
61 
62  segment_data(key_type _beg, key_type _end, value_type p) : begin_key(_beg), end_key(_end), pdata(p)
63  {}
64 
65  bool operator==(const segment_data& r) const
66  {
67  return begin_key == r.begin_key && end_key == r.end_key && pdata == r.pdata;
68  }
69 
70  bool operator!=(const segment_data& r) const
71  {
72  return !operator==(r);
73  }
74  };
75 
76  struct segment_map_printer
77  {
78  void operator()(const ::std::pair<value_type, ::std::pair<key_type, key_type>>& r) const
79  {
80  using namespace std;
81  cout << r.second.first << "-" << r.second.second << ": " << r.first->name << endl;
82  }
83  };
84 #endif
85 
86 public:
87  typedef ::std::vector<value_type> data_chain_type;
88  typedef std::unordered_map<value_type, ::std::pair<key_type, key_type>> segment_map_type;
89  typedef ::std::map<value_type, ::std::pair<key_type, key_type>> sorted_segment_map_type;
90 
92  {
93  key_type low;
94  key_type high;
95  data_chain_type* data_chain;
96 
97  bool operator==(const nonleaf_value_type& r) const
98  {
99  return low == r.low && high == r.high && data_chain == r.data_chain;
100  }
101  };
102 
104  {
105  key_type key;
106  data_chain_type* data_chain;
107 
108  bool operator==(const leaf_value_type& r) const
109  {
110  return key == r.key && data_chain == r.data_chain;
111  }
112  };
113 
115  struct init_handler;
116  struct dispose_handler;
117 #ifdef MDDS_UNIT_TEST
118  struct to_string_handler;
119 #endif
120 
122  typedef typename node::node_ptr node_ptr;
123 
125 
127  {
128  void operator()(
129  __st::nonleaf_node<segment_tree>& _self, const __st::node_base* left_node,
130  const __st::node_base* right_node)
131  {
132  // Parent node should carry the range of all of its child nodes.
133  if (left_node)
134  {
135  _self.value_nonleaf.low = left_node->is_leaf
136  ? static_cast<const node*>(left_node)->value_leaf.key
137  : static_cast<const nonleaf_node*>(left_node)->value_nonleaf.low;
138  }
139  else
140  {
141  // Having a left node is prerequisite.
142  throw general_error("segment_tree::fill_nonleaf_value_handler: Having a left node is prerequisite.");
143  }
144 
145  if (right_node)
146  {
147  if (right_node->is_leaf)
148  {
149  // When the child nodes are leaf nodes, the upper bound
150  // must be the value of the node that comes after the
151  // right leaf node (if such node exists).
152 
153  const node* p = static_cast<const node*>(right_node);
154  if (p->next)
155  _self.value_nonleaf.high = p->next->value_leaf.key;
156  else
157  _self.value_nonleaf.high = p->value_leaf.key;
158  }
159  else
160  {
161  _self.value_nonleaf.high = static_cast<const nonleaf_node*>(right_node)->value_nonleaf.high;
162  }
163  }
164  else
165  {
166  _self.value_nonleaf.high = left_node->is_leaf
167  ? static_cast<const node*>(left_node)->value_leaf.key
168  : static_cast<const nonleaf_node*>(left_node)->value_nonleaf.high;
169  }
170  }
171  };
172 
173 #ifdef MDDS_UNIT_TEST
174  struct to_string_handler
175  {
176  std::string operator()(const node& _self) const
177  {
178  std::ostringstream os;
179  os << "[" << _self.value_leaf.key << "] ";
180  return os.str();
181  }
182 
183  std::string operator()(const __st::nonleaf_node<segment_tree>& _self) const
184  {
185  std::ostringstream os;
186  os << "[" << _self.value_nonleaf.low << "-" << _self.value_nonleaf.high << ")";
187  if (_self.value_nonleaf.data_chain)
188  {
189  os << " { ";
190  typename data_chain_type::const_iterator itr, itr_beg = _self.value_nonleaf.data_chain->begin(),
191  itr_end = _self.value_nonleaf.data_chain->end();
192  for (itr = itr_beg; itr != itr_end; ++itr)
193  {
194  if (itr != itr_beg)
195  os << ", ";
196  os << (*itr)->name;
197  }
198  os << " }";
199  }
200  os << " ";
201  return os.str();
202  }
203  };
204 #endif
205 
207  {
208  void operator()(node& _self)
209  {
210  _self.value_leaf.data_chain = nullptr;
211  }
212 
213  void operator()(__st::nonleaf_node<segment_tree>& _self)
214  {
215  _self.value_nonleaf.data_chain = nullptr;
216  }
217  };
218 
220  {
221  void operator()(node& _self)
222  {
223  delete _self.value_leaf.data_chain;
224  }
225 
226  void operator()(__st::nonleaf_node<segment_tree>& _self)
227  {
228  delete _self.value_nonleaf.data_chain;
229  }
230  };
231 
232 #ifdef MDDS_UNIT_TEST
233  struct node_printer
234  {
235  void operator()(const __st::node_base* p) const
236  {
237  if (p->is_leaf)
238  std::cout << static_cast<const node*>(p)->to_string() << " ";
239  else
240  std::cout << static_cast<const nonleaf_node*>(p)->to_string() << " ";
241  }
242  };
243 #endif
244 
245 private:
250  class search_results_base
251  {
252  public:
253  typedef std::vector<data_chain_type*> res_chains_type;
254  typedef std::shared_ptr<res_chains_type> res_chains_ptr;
255 
256  public:
257  search_results_base() : mp_res_chains(static_cast<res_chains_type*>(nullptr))
258  {}
259 
260  search_results_base(const search_results_base& r) : mp_res_chains(r.mp_res_chains)
261  {}
262 
263  size_t size() const
264  {
265  size_t combined = 0;
266  if (!mp_res_chains)
267  return combined;
268 
269  typename res_chains_type::const_iterator itr = mp_res_chains->begin(), itr_end = mp_res_chains->end();
270  for (; itr != itr_end; ++itr)
271  combined += (*itr)->size();
272  return combined;
273  }
274 
275  void push_back_chain(data_chain_type* chain)
276  {
277  if (!chain || chain->empty())
278  return;
279 
280  if (!mp_res_chains)
281  mp_res_chains.reset(new res_chains_type);
282  mp_res_chains->push_back(chain);
283  }
284 
285  res_chains_ptr& get_res_chains()
286  {
287  return mp_res_chains;
288  }
289 
290  private:
291  res_chains_ptr mp_res_chains;
292  };
293 
294  class iterator_base
295  {
296  protected:
297  typedef typename search_results_base::res_chains_type res_chains_type;
298  typedef typename search_results_base::res_chains_ptr res_chains_ptr;
299 
300  iterator_base(const res_chains_ptr& p) : mp_res_chains(p), m_end_pos(true)
301  {}
302 
303  public:
304  typedef ::std::bidirectional_iterator_tag iterator_category;
305  typedef typename data_chain_type::value_type value_type;
306  typedef typename data_chain_type::pointer pointer;
307  typedef typename data_chain_type::reference reference;
308  typedef typename data_chain_type::difference_type difference_type;
309 
310  iterator_base() : mp_res_chains(static_cast<res_chains_type*>(nullptr)), m_end_pos(true)
311  {}
312 
313  iterator_base(const iterator_base& r)
314  : mp_res_chains(r.mp_res_chains), m_cur_chain(r.m_cur_chain), m_cur_pos_in_chain(r.m_cur_pos_in_chain),
315  m_end_pos(r.m_end_pos)
316  {}
317 
318  iterator_base& operator=(const iterator_base& r)
319  {
320  mp_res_chains = r.mp_res_chains;
321  m_cur_chain = r.m_cur_chain;
322  m_cur_pos_in_chain = r.m_cur_pos_in_chain;
323  m_end_pos = r.m_end_pos;
324  return *this;
325  }
326 
327  typename data_chain_type::value_type* operator++()
328  {
329  // We don't check for end position flag for performance reasons.
330  // The caller is responsible for making sure not to increment past
331  // end position.
332 
333  // When reaching the end position, the internal iterators still
334  // need to be pointing at the last item before the end position.
335  // This is why we need to make copies of the iterators, and copy
336  // them back once done.
337 
338  typename data_chain_type::iterator cur_pos_in_chain = m_cur_pos_in_chain;
339 
340  if (++cur_pos_in_chain == (*m_cur_chain)->end())
341  {
342  // End of current chain. Inspect the next chain if exists.
343  typename res_chains_type::iterator cur_chain = m_cur_chain;
344  ++cur_chain;
345  if (cur_chain == mp_res_chains->end())
346  {
347  m_end_pos = true;
348  return nullptr;
349  }
350  m_cur_chain = cur_chain;
351  m_cur_pos_in_chain = (*m_cur_chain)->begin();
352  }
353  else
354  ++m_cur_pos_in_chain;
355 
356  return operator->();
357  }
358 
359  typename data_chain_type::value_type* operator--()
360  {
361  if (!mp_res_chains)
362  return nullptr;
363 
364  if (m_end_pos)
365  {
366  m_end_pos = false;
367  return &(*m_cur_pos_in_chain);
368  }
369 
370  if (m_cur_pos_in_chain == (*m_cur_chain)->begin())
371  {
372  if (m_cur_chain == mp_res_chains->begin())
373  {
374  // Already at the first data chain. Don't move the iterator position.
375  return nullptr;
376  }
377  --m_cur_chain;
378  m_cur_pos_in_chain = (*m_cur_chain)->end();
379  }
380  --m_cur_pos_in_chain;
381  return operator->();
382  }
383 
384  bool operator==(const iterator_base& r) const
385  {
386  if (mp_res_chains.get())
387  {
388  // non-empty result set.
389  return mp_res_chains.get() == r.mp_res_chains.get() && m_cur_chain == r.m_cur_chain &&
390  m_cur_pos_in_chain == r.m_cur_pos_in_chain && m_end_pos == r.m_end_pos;
391  }
392 
393  // empty result set.
394  if (r.mp_res_chains.get())
395  return false;
396  return m_end_pos == r.m_end_pos;
397  }
398 
399  bool operator!=(const iterator_base& r) const
400  {
401  return !operator==(r);
402  }
403 
404  typename data_chain_type::value_type& operator*()
405  {
406  return *m_cur_pos_in_chain;
407  }
408 
409  typename data_chain_type::value_type* operator->()
410  {
411  return &(*m_cur_pos_in_chain);
412  }
413 
414  protected:
415  void move_to_front()
416  {
417  if (!mp_res_chains)
418  {
419  // Empty data set.
420  m_end_pos = true;
421  return;
422  }
423 
424  // We assume that there is at least one chain list, and no
425  // empty chain list exists. So, skip the check.
426  m_cur_chain = mp_res_chains->begin();
427  m_cur_pos_in_chain = (*m_cur_chain)->begin();
428  m_end_pos = false;
429  }
430 
431  void move_to_end()
432  {
433  m_end_pos = true;
434  if (!mp_res_chains)
435  // Empty data set.
436  return;
437 
438  m_cur_chain = mp_res_chains->end();
439  --m_cur_chain;
440  m_cur_pos_in_chain = (*m_cur_chain)->end();
441  --m_cur_pos_in_chain;
442  }
443 
444  private:
445  res_chains_ptr mp_res_chains;
446  typename res_chains_type::iterator m_cur_chain;
447  typename data_chain_type::iterator m_cur_pos_in_chain;
448  bool m_end_pos : 1;
449  };
450 
451 public:
452  class search_results : public search_results_base
453  {
454  typedef typename search_results_base::res_chains_type res_chains_type;
455  typedef typename search_results_base::res_chains_ptr res_chains_ptr;
456 
457  public:
458  class iterator : public iterator_base
459  {
460  friend class segment_tree<_Key, _Value>::search_results;
461 
462  private:
463  iterator(const res_chains_ptr& p) : iterator_base(p)
464  {}
465 
466  public:
467  iterator() : iterator_base()
468  {}
469  };
470 
471  typename search_results::iterator begin()
472  {
473  typename search_results::iterator itr(search_results_base::get_res_chains());
474  itr.move_to_front();
475  return itr;
476  }
477 
478  typename search_results::iterator end()
479  {
480  typename search_results::iterator itr(search_results_base::get_res_chains());
481  itr.move_to_end();
482  return itr;
483  }
484  };
485 
487  {
488  public:
489  search_result_vector_inserter(search_results_type& result) : m_result(result)
490  {}
491  void operator()(data_chain_type* node_data)
492  {
493  if (!node_data)
494  return;
495 
496  typename data_chain_type::const_iterator itr = node_data->begin(), itr_end = node_data->end();
497  for (; itr != itr_end; ++itr)
498  m_result.push_back(*itr);
499  }
500 
501  private:
502  search_results_type& m_result;
503  };
504 
506  {
507  public:
508  search_result_inserter(search_results_base& result) : m_result(result)
509  {}
510  void operator()(data_chain_type* node_data)
511  {
512  if (!node_data)
513  return;
514 
515  m_result.push_back_chain(node_data);
516  }
517 
518  private:
519  search_results_base& m_result;
520  };
521 
522  segment_tree();
523  segment_tree(const segment_tree& r);
524  ~segment_tree();
525 
530  bool operator==(const segment_tree& r) const;
531 
532  bool operator!=(const segment_tree& r) const
533  {
534  return !operator==(r);
535  }
536 
543  bool is_tree_valid() const
544  {
545  return m_valid_tree;
546  }
547 
551  void build_tree();
552 
562  bool insert(key_type begin_key, key_type end_key, value_type pdata);
563 
580  bool search(key_type point, search_results_type& result) const;
581 
591  search_results search(key_type point) const;
592 
600  void remove(value_type value);
601 
605  void clear();
606 
610  size_t size() const;
611 
615  bool empty() const;
616 
622  size_t leaf_size() const;
623 
624 #ifdef MDDS_UNIT_TEST
625  void dump_tree() const;
626  void dump_leaf_nodes() const;
627  void dump_segment_data() const;
628  bool verify_node_lists() const;
629 
630  struct leaf_node_check
631  {
632  key_type key;
633  data_chain_type data_chain;
634  };
635 
636  bool verify_leaf_nodes(const ::std::vector<leaf_node_check>& checks) const;
637 
644  bool verify_segment_data(const segment_map_type& checks) const;
645 #endif
646 
647 private:
651  void search(key_type point, search_results_base& result) const;
652 
653  typedef std::vector<__st::node_base*> node_list_type;
654  typedef std::map<value_type, std::unique_ptr<node_list_type>> data_node_map_type;
655 
656  static void create_leaf_node_instances(const ::std::vector<key_type>& keys, node_ptr& left, node_ptr& right);
657 
663  void descend_tree_and_mark(
664  __st::node_base* pnode, value_type pdata, key_type begin_key, key_type end_key, node_list_type* plist);
665 
666  void build_leaf_nodes();
667 
672  void remove_data_from_nodes(node_list_type* plist, const value_type pdata);
673  void remove_data_from_chain(data_chain_type& chain, const value_type pdata);
674 
675  void clear_all_nodes();
676 
677 #ifdef MDDS_UNIT_TEST
678  static bool has_data_pointer(const node_list_type& node_list, const value_type pdata);
679  static void print_leaf_value(const leaf_value_type& v);
680 #endif
681 
682 private:
683  std::vector<nonleaf_node> m_nonleaf_node_pool;
684 
685  segment_map_type m_segment_data;
686 
692  data_node_map_type m_tagged_node_map;
693 
694  nonleaf_node* m_root_node;
695  node_ptr m_left_leaf;
696  node_ptr m_right_leaf;
697  bool m_valid_tree : 1;
698 };
699 
700 } // namespace mdds
701 
702 #include "segment_tree_def.inl"
703 
704 #endif
Definition: global.hpp:84
Definition: segment_tree.hpp:506
Definition: segment_tree.hpp:459
Definition: segment_tree.hpp:453
Definition: segment_tree.hpp:48
bool insert(key_type begin_key, key_type end_key, value_type pdata)
bool operator==(const segment_tree &r) const
bool empty() const
bool search(key_type point, search_results_type &result) const
void remove(value_type value)
size_t leaf_size() const
bool is_tree_valid() const
Definition: segment_tree.hpp:543
size_t size() const
search_results search(key_type point) const
Definition: node.hpp:44
bool is_leaf
parent nonleaf_node
Definition: node.hpp:46
Definition: node.hpp:140
node_ptr next
previous sibling leaf node.
Definition: node.hpp:162
Definition: node.hpp:56
Definition: segment_tree.hpp:220
Definition: segment_tree.hpp:127
Definition: segment_tree.hpp:207
Definition: segment_tree.hpp:104
Definition: segment_tree.hpp:92
key_type high
low range value (inclusive)
Definition: segment_tree.hpp:94
data_chain_type * data_chain
high range value (non-inclusive)
Definition: segment_tree.hpp:95