BitMagic-C++
strsvsample05.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example strsvsample05.cpp
20 
21  Example of how to use bm::str_sparse_vector<> - succinct container for
22  bit-transposed string collections for deserialization of only select elements
23  from the serialized BLOB
24 
25  \sa bm::str_sparse_vector
26  \sa bm::sparse_vector_deserializer
27  \sa bm::sparse_vector_serializer
28 
29 */
30 
31 /*! \file strsvsample05.cpp
32  \brief Example: str_sparse_vector<> gather deserialization example
33 
34  This example loads a range of a sparse vector from an STL container to save
35  memory and improve deserialization performance
36 */
37 
38 #include <iostream>
39 #include <string>
40 #include <vector>
41 #include <assert.h>
42 
43 #include "bm.h"
44 #include "bmstrsparsevec.h"
45 #include "bmsparsevec_serial.h"
46 
47 
48 using namespace std;
49 
52 
53 
54 
55 
56 int main(void)
57 {
58  try
59  {
60  str_sv_type str_sv1;
61  str_sv_type str_sv2;
62  str_sv_type str_sv3;
63 
64  {
65  str_sv_type str_sv0;
66  // here we generate collection of k-mer (4-mer) strings
67  // imitating a DNA sequence
68  {
69  auto bi = str_sv0.get_back_inserter();
70  for (unsigned i = 0; i < 100000; ++i)
71  {
72  bi = "ATGC";
73  bi = "GCTA";
74  bi = "GCAA";
75  bi = "TATA";
76  } // for
77  }
78  str_sv1.remap_from(str_sv0); // SV1 now contains a remapped(smaller) copy of SV0
79  }
81  str_sv1.optimize(tb);
82 
83 
84  // calculate memory footprint
85  //
87  str_sv1.calc_stat(&st);
88 
89  cout << "Used memory: " << st.memory_used << std::endl;
90 
92 
93  // construct a serializer utility class, setup serialization parameters
94  //
95  // please note, use of "set_bookmarks()" to enable fast range
96  // deserialization. Bookmarks somewhat increase the BLOB size but allow
97  // more effeiciently skip parts which we would not need (paging) and
98  // avoid decompression of blocks we would never need
99  //
100  // This example sets "128" as a bookmarks parameter, but you have to
101  // experiment with what works for you, between 4 and 512
102  //
103  // Each block corresponds to 64K vector element
104  // making bookmarks after each block does not make much sense
105  // because decode is reasonably fast and some residual throw away
106  // is usually ok.
107  //
109  sv_serializer.set_bookmarks(true, 128);
110 
111 
112  // run str-vector serialization with compression
113  //
114  sv_serializer.serialize(str_sv1, sv_lay);
115 
116  const unsigned char* buf = sv_lay.buf();
117  cout << "Serialized size = " << sv_lay.size() << endl;
118 
119  // instantiate deserializer utility class
120  //
122 
123 
124  bvector_type::size_type from = 100000;
125  bvector_type::size_type to = from + 65536;
126  {
127  // 1.
128  // one way to deserialize is to provide a mask vector
129  // specifying which sparse vector elements needs to be
130  // decompressed from the BLOB
131  // mask vector does not necessarily has to be just one range
132  //
133  bvector_type bv_mask;
134  bv_mask.set_range(from, to);
135  sv_deserial.deserialize(str_sv2, buf, bv_mask);
136 
137 
138  // 2.
139  // If it is just one range (common use case for paging)
140  // it is faster and cleaner to use deserialize_range().
141  // It will produce the same result as with (1) just faster.
142  //
143  sv_deserial.deserialize_range(str_sv3, buf, from, to);
144 
145  // run a quick comparison, that selected range matches values in
146  // the container str_sv2, str_sv3
147  //
148  char s1[16]; char s2[16]; char s3[16];
149  for (bvector_type::size_type j = from; j < to; ++j)
150  {
151  str_sv1.get(j, s1, sizeof(s1));
152  str_sv2.get(j, s2, sizeof(s2));
153  str_sv3.get(j, s3, sizeof(s3));
154 
155  int cmp;
156  cmp = ::strcmp(s1, s2);
157  assert(cmp==0);
158  cmp = ::strcmp(s1, s3);
159  assert(cmp==0);
160 
161  } // for j
162  cout << "Gather deserialization check OK" << endl;
163  }
164 
165  }
166  catch(std::exception& ex)
167  {
168  std::cerr << ex.what() << std::endl;
169  return 1;
170  }
171 
172 
173  return 0;
174 }
175 
bm::bvector::set_range
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
Definition: bm.h:2158
bm::sparse_vector_deserializer
sparse vector de-serializer
Definition: bmsparsevec_serial.h:223
main
int main(void)
Definition: strsvsample05.cpp:56
bm::str_sparse_vector::get
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
Definition: bmstrsparsevec.h:1295
BM_DECLARE_TEMP_BLOCK
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
bm::sparse_vector_serial_layout::buf
const unsigned char * buf() const
Return serialization buffer pointer.
Definition: bmsparsevec_serial.h:106
bm::str_sparse_vector::calc_stat
void calc_stat(struct str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
Definition: bmstrsparsevec.h:1338
bmsparsevec_serial.h
Serialization for sparse_vector<>
bm::sparse_vector_serial_layout
layout class for serialization buffer structure
Definition: bmsparsevec_serial.h:57
bm::bvector<>
bm::str_sparse_vector
sparse vector for strings with compression using bit transposition method
Definition: bmstrsparsevec.h:56
bmstrsparsevec.h
string sparse vector based on bit-transposed matrix
bm::str_sparse_vector::get_back_inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
Definition: bmstrsparsevec.h:721
bm::sparse_vector_serializer::serialize
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
Definition: bmsparsevec_serial.h:628
str_sv_type
bm::str_sparse_vector< char, bvector_type, 5 > str_sv_type
Definition: strsvsample05.cpp:51
bm::sparse_vector_deserializer::deserialize
void deserialize(SV &sv, const unsigned char *buf)
Definition: bmsparsevec_serial.h:243
bm::sparse_vector_serializer::set_bookmarks
void set_bookmarks(bool enable, unsigned bm_interval=256)
Add skip-markers for faster range deserialization.
Definition: bmsparsevec_serial.h:183
bm::sparse_vector_serial_layout::size
size_t size() const
return current serialized size
Definition: bmsparsevec_serial.h:84
bm::bv_statistics::memory_used
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:61
bm::sparse_vector_deserializer::deserialize_range
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to)
Definition: bmsparsevec_serial.h:788
bm::str_sparse_vector::optimize
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmstrsparsevec.h:1323
bvector_type
bm::bvector bvector_type
Definition: strsvsample05.cpp:50
bm::bvector::size_type
bm::id_t size_type
Definition: bm.h:117
bm.h
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
bm::sparse_vector_serializer
Definition: bmsparsevec_serial.h:160
bm::str_sparse_vector::remap_from
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector.
Definition: bmstrsparsevec.h:1600
bm::str_sparse_vector::statistics
Definition: bmstrsparsevec.h:72