Claw 1.7.3
string_algorithm.tpp
1/*
2 CLAW - a C++ Library Absolutely Wonderful
3
4 CLAW is a free library without any particular aim but being useful to
5 anyone.
6
7 Copyright (C) 2005-2011 Julien Jorge
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22
23 contact: julien.jorge@gamned.org
24*/
25/**
26 * \file string_algorithm.tpp
27 * \brief Implementation of the algorithms on strings.
28 * \author Julien Jorge
29 */
30
31#include <claw/algorithm.hpp>
32#include <claw/glob.hpp>
33
34#include <sstream>
35#include <string>
36#include <iterator>
37
38/*----------------------------------------------------------------------------*/
39/**
40 * \brief A portable version of std::getline( is, str, '\\n' ) that removes a
41 * tailing '\\r'.
42 * \param is The stream in which we read.
43 * \param str The line read from the stream.
44 */
45template<typename StreamType, typename StringType>
46StreamType& claw::text::getline( StreamType& is, StringType& str )
47{
48 std::getline( is, str );
49
50 if ( !str.empty() )
51 if ( str[ str.size() - 1 ] == typename StringType::value_type('\r') )
52 str.erase( str.size() - 1 );
53
54 return is;
55} // getline()
56
57/*----------------------------------------------------------------------------*/
58/**
59 * \brief Remove characters at the begining of a string.
60 * \param str The string to modify.
61 * \param s The characters to remove.
62 */
63template<typename StringType>
64void claw::text::trim_left( StringType& str,
65 const typename StringType::value_type* const s )
66{
67 typename StringType::size_type p = str.find_first_not_of(s);
68
69 if (p != StringType::npos)
70 str = str.substr(p);
71} // trim_left()
72
73/*----------------------------------------------------------------------------*/
74/**
75 * \brief Remove characters at the end of a string.
76 * \param str The string to modify.
77 * \param s The characters to remove.
78 */
79template<typename StringType>
80void claw::text::trim_right( StringType& str,
81 const typename StringType::value_type* const s )
82{
83 typename StringType::size_type p = str.find_last_not_of(s);
84
85 if (p != StringType::npos)
86 str = str.substr( 0, p+1 );
87} // trim_right()
88
89/*----------------------------------------------------------------------------*/
90/**
91 * \brief Remove characters at the begining end at the end of a string.
92 * \param str The string to modify.
93 * \param s The characters to remove.
94 */
95template<typename StringType>
96void claw::text::trim( StringType& str,
97 const typename StringType::value_type* const s )
98{
99 typename StringType::size_type first = str.find_first_not_of(s);
100 typename StringType::size_type last = str.find_last_not_of(s);
101
102 if (first != StringType::npos)
103 str = str.substr( first, last - first + 1 );
104} // trim()
105
106/*----------------------------------------------------------------------------*/
107/**
108 * \brief Squeeze successive characters of a string into one character.
109 * \param str The string to modify.
110 * \param s The characters to remove.
111 *
112 * \b Example :
113 * <tt>
114 * std::string s("word aaa bbb abab");
115 * claw::squeeze( s, "ab" );
116 * std::cout << s << std::end; // result is "word a b abab"
117 * </tt>
118 */
119template<typename StringType>
120void claw::text::squeeze( StringType& str,
121 const typename StringType::value_type* const s )
122{
123 typedef typename StringType::size_type size_type;
124
125 size_type first(0);
126
127 do
128 {
129 first = str.find_first_of(s, first);
130
131 if ( first != StringType::npos )
132 {
133 size_type last = str.find_first_not_of(str[first], first+1);
134
135 if ( last == StringType::npos )
136 str = str.substr(0, first+1);
137 else if ( last - first > 1 )
138 str = str.substr(0, first+1) + str.substr(last);
139
140 ++first;
141 }
142 }
143 while ( (first != StringType::npos) && (first != str.length()) );
144} // squeeze()
145
146/*----------------------------------------------------------------------------*/
147/**
148 * \brief Replace a set of characters by other characters.
149 * \param str The string to modify.
150 * \param e1 The characters to remove.
151 * \param e2 The characters replacing the ones in \a e1.
152 *
153 * \return The number of replaced characters.
154 *
155 * Each character e1[i] will be replaced with e2[i]. If \a e1 is smaller than
156 * \a e2, the latter will be completed by repeating its last character.
157 *
158 * \b Example :
159 * <tt>
160 * std::string s("word aaa bbb abab");
161 * claw::replace( s, "ab", "ba" );
162 * std::cout << s << std::end; // result is "word bbb aaa baba"
163 * </tt>
164 */
165template<typename StringType>
166std::size_t claw::text::replace
167( StringType& str, const StringType& e1, const StringType& e2 )
168{
169 return
170 claw::replace
171 ( str.begin(), str.end(), e1.begin(), e1.end(), e2.begin(), e2.end() );
172} // replace()
173
174/*----------------------------------------------------------------------------*/
175/**
176 * \brief Test if the content of a string is immediately convertible to a type.
177 * \param str The string to test.
178 */
179template<typename T, typename StringType>
180bool claw::text::is_of_type( const StringType& str )
181{
182 std::basic_istringstream< typename StringType::value_type,
183 typename StringType::traits_type,
184 typename StringType::allocator_type > iss(str);
185
186 T val;
187 bool result = false;
188
189 if ( iss >> val )
190 result = iss.eof();
191
192 return result;
193} // is_of_type()
194
195/*----------------------------------------------------------------------------*/
196/**
197 * \brief Split a string into several substrings, according to a given
198 * separator.
199 * \param sequence A sequence in which the substrings are added.
200 * \param str The string to split.
201 * \param sep The separator on which the string is splitted.
202 */
203template<typename Sequence>
204void claw::text::split
205( Sequence& sequence, const typename Sequence::value_type& str,
206 const typename Sequence::value_type::value_type sep )
207{
208 split(sequence, str.begin(), str.end(), sep);
209} // split()
210
211/*----------------------------------------------------------------------------*/
212/**
213 * \brief Split a string into several substrings, according to a given
214 * separator.
215 * \param sequence A sequence in which the substrings are added.
216 * \param first Iterator on the beginning of the string to split.
217 * \param last Iterator just past the end of the string to split.
218 * \param sep The separator on which the string is splitted.
219 */
220template<typename Sequence>
221void claw::text::split
222( Sequence& sequence, typename Sequence::value_type::const_iterator first,
223 typename Sequence::value_type::const_iterator last,
224 const typename Sequence::value_type::value_type sep )
225{
226 typedef typename Sequence::value_type string_type;
227
228 string_type line;
229 std::basic_istringstream< typename string_type::value_type,
230 typename string_type::traits_type,
231 typename string_type::allocator_type > iss( string_type(first, last) );
232
233 while ( std::getline(iss, line, sep) )
234 *std::insert_iterator<Sequence>(sequence, sequence.end()) = line;
235} // split()
236
237/*----------------------------------------------------------------------------*/
238/**
239 * \brief Find escaped symbols in a sequence of characters and replace them by
240 * their c-equivalent.
241 *
242 * \param first Iterator on the beginning of the string to escape.
243 * \param last Iterator just past the end of the string to escape.
244 * \param out Iterator on the beginning of the output string.
245 * \pre \a out points on a range long enough to store the resulting string.
246 *
247 * \code
248 * std::string s("\\a\\t\\n\\r");
249 * std::string r;
250 *
251 * claw::text::c_escape( s.begin(), s.end(), std::insert_iterator(r, r.end()) );
252 *
253 * if ( r == "\a\t\n\r" )
254 * std::cout << "It works!" << std::endl;
255 * \endcode
256 *
257 * \remark This method has not been tested with wide chars yet.
258 */
259template<typename InputIterator, typename OutputIterator>
260void claw::text::c_escape
261( InputIterator first, InputIterator last, OutputIterator out )
262{
263 typedef typename std::iterator_traits<InputIterator>::value_type char_type;
264 typedef std::basic_string<char_type> string_type;
265
266 const string_type oct("01234567");
267 const string_type hex("0123456789ABCDEFabcdef");
268
269 bool escape(false);
270
271 for ( ; first!=last; ++out )
272 if ( escape )
273 {
274 switch( *first )
275 {
276 case 'a': *out = '\a'; ++first; break;
277 case 'b': *out = '\b'; ++first; break;
278 case 'f': *out = '\f'; ++first; break;
279 case 'n': *out = '\n'; ++first; break;
280 case 'r': *out = '\r'; ++first; break;
281 case 't': *out = '\t'; ++first; break;
282 case 'v': *out = '\v'; ++first; break;
283 case 'o':
284 {
285 ++first;
286 int v(0);
287 const InputIterator e
288 ( find_first_not_of(first, last, oct.begin(), oct.end()) );
289
290 std::basic_istringstream<char_type> iss( string_type(first, e) );
291 iss >> std::oct >> v;
292 *out = (char_type)v;
293 first = e;
294 break;
295 }
296 case 'x':
297 {
298 ++first;
299 int v(0);
300 const InputIterator e
301 ( find_first_not_of(first, last, hex.begin(), hex.end()) );
302
303 std::basic_istringstream<char_type> iss( string_type(first, e) );
304 iss >> std::hex >> v;
305 *out = (char_type)v;
306 first = e;
307 break;
308 }
309 default: *out = *first; ++first;
310 }
311
312 escape = false;
313 }
314 else if ( *first == '\\' )
315 {
316 escape = true;
317 ++first;
318 }
319 else
320 {
321 *out = *first;
322 ++first;
323 }
324} // c_escape()
325
326/*----------------------------------------------------------------------------*/
327/**
328 * \brief Check if a string matches a given pattern.
329 * \param pattern The pattern.
330 * \param text The text to check.
331 * \param any_sequence A value representing any sequence of values, empty or
332 * not.
333 * \param zero_or_one A value representing any value or no value.
334 * \param any A value representing any value.
335 */
336template<typename StringType>
337bool claw::text::glob_match
338( const StringType& pattern, const StringType& text,
339 const typename StringType::value_type any_sequence,
340 const typename StringType::value_type zero_or_one,
341 const typename StringType::value_type any )
342{
343 return claw::glob_match
344 ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
345 zero_or_one, any );
346} // glob_match()
347
348/*----------------------------------------------------------------------------*/
349/**
350 * \brief Check if a string may match a given pattern.
351 * \param pattern The pattern.
352 * \param text The text to check.
353 * \param any_sequence A value representing any sequence of values, empty or
354 * not.
355 * \param zero_or_one A value representing any value or no value.
356 * \param any A value representing any value.
357 */
358template<typename StringType>
359bool claw::text::glob_potential_match
360( const StringType& pattern, const StringType& text,
361 const typename StringType::value_type any_sequence,
362 const typename StringType::value_type zero_or_one,
363 const typename StringType::value_type any )
364{
365 return claw::glob_potential_match
366 ( pattern.begin(), pattern.end(), text.begin(), text.end(), any_sequence,
367 zero_or_one, any );
368} // glob_potential_match()