WvStreams
wvtclstring.cc
1/*
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4 */
5#include "wvbackslash.h"
6#include "wvbuf.h"
7#include "wvstream.h"
8#include "wvstring.h"
9#include "wvstringmask.h"
10#include "wvtclstring.h"
11#include <climits>
12
13const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR);
14const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR);
15const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR);
16
17static size_t wvtcl_escape(char *dst, const char *s, size_t s_len,
18 const WvStringMask &nasties, bool *verbatim = NULL)
19{
20 if (verbatim) *verbatim = false;
21
22 // NULL strings remain such
23 if (s == NULL)
24 return 0;
25 // empty strings are just {}
26 if (s_len == 0)
27 {
28 if (dst)
29 {
30 dst[0] = '{';
31 dst[1] = '}';
32 }
33 return 2;
34 }
35
36 bool backslashify = false, inescape = false;
37 int len = 0, unprintables = 0, bracecount = 0;
38 const char *cptr, *cptr_end = s + s_len;
39
40 // figure out which method we need to use: backslashify or embrace.
41 // also count the number of unprintable characters we'll need to
42 // backslashify, if it turns out that's necessary.
43 for (cptr = s; cptr != cptr_end; cptr++)
44 {
45 // Assume we do nothing
46 if (dst) dst[len] = *cptr;
47 ++len;
48
49 if (!inescape && *cptr == '{')
50 bracecount++;
51 else if (!inescape && *cptr == '}')
52 bracecount--;
53 if (bracecount < 0)
54 backslashify = true;
55
56 bool doit = false;
57 switch (*cptr)
58 {
59 case WVTCL_ALWAYS_NASTY_CASE:
60 doit = true;
61 break;
62 default:
63 if (nasties[*cptr])
64 doit = true;
65 }
66 if (doit)
67 unprintables++;
68
69 if (*cptr == '\\')
70 inescape = !inescape;
71 else
72 inescape = false;
73 }
74
75 // if the braces aren't balanced, backslashify
76 if (bracecount != 0 || inescape)
77 backslashify = true;
78
79 if (!backslashify && !unprintables)
80 {
81 if (verbatim) *verbatim = true;
82 return len; // no work needed!
83 }
84
85 if (backslashify)
86 {
87 if (dst)
88 {
89 len = 0;
90 for (cptr = s; cptr != cptr_end; ++cptr)
91 {
92 bool doit = false;
93 switch (*cptr)
94 {
95 case WVTCL_ALWAYS_NASTY_CASE:
96 doit = true;
97 break;
98 default:
99 if (nasties[*cptr])
100 doit = true;
101 }
102 if (doit)
103 dst[len++] = '\\';
104
105 dst[len++] = *cptr;
106 }
107 return len;
108 }
109 else return len+unprintables;
110 }
111 else
112 {
113 // the embrace method: just take the string and put braces around it
114 if (dst)
115 {
116 len = 0;
117 dst[len++] = '{';
118 for (cptr = s; cptr != cptr_end; ++cptr)
119 dst[len++] = *cptr;
120 dst[len++] = '}';
121 return len;
122 }
123 else return len+2;
124 }
125}
126
127
129{
130 size_t s_len = s.len();
131
132 bool verbatim;
133 size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim);
134 if (verbatim) return s;
135
136 WvString result;
137 result.setsize(len);
138 char *e = result.edit();
139 e += wvtcl_escape(e, s, s_len, nasties);
140 *e = '\0';
141 return result;
142}
143
144
145static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len,
146 bool *verbatim = NULL)
147{
148 //printf(" unescape '%s'\n", (const char *)s);
149
150 // empty or NULL strings remain themselves
151 if (!s)
152 {
153 if (verbatim) *verbatim = true;
154 return 0;
155 }
156
157 if (verbatim) *verbatim = false;
158
159 // deal with embraced strings by simply removing the braces
160 if (s[0] == '{' && s[s_len-1] == '}')
161 {
162 if (dst) memcpy(dst, &s[1], s_len-2);
163 return s_len - 2;
164 }
165
166 bool skipquotes = false;
167 // deal with quoted strings by ignoring the quotes _and_ unbackslashifying.
168 if (s[0] == '"' && s[s_len-1] == '"')
169 skipquotes = true;
170
171 // otherwise, unbackslashify it.
172 const char *start = s, *end = &s[s_len];
173 if (skipquotes)
174 {
175 ++start;
176 --end;
177 }
178 size_t len = 0;
179 bool inescape = false;
180 for (; start != end; ++start)
181 {
182 if (*start == '\\')
183 {
184 if (inescape)
185 {
186 if (dst) dst[len] = *start;
187 len++;
188 inescape = false;
189 }
190 else
191 inescape = true;
192 }
193 else
194 {
195 inescape = false;
196 if (dst) dst[len] = *start;
197 len++;
198 }
199 }
200 return len;
201}
202
203
205{
206 size_t s_len = s.len();
207
208 bool verbatim;
209 size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim);
210 if (verbatim) return s;
211
212 WvString result;
213 result.setsize(len+1);
214 char *e = result.edit();
215 e += wvtcl_unescape(e, s, s_len);
216 *e = '\0';
217 return result;
218}
219
220
222 const WvStringMask &splitchars)
223{
224 int size = 0;
225
227 int count = 0;
228 for (i.rewind(); i.next(); )
229 {
230 size += wvtcl_escape(NULL, *i, i->len(), nasties);
231 ++count;
232 }
233
234 WvString result;
235 result.setsize(size+(count-1)+1);
236
237 char *p = result.edit();
238 int j;
239 for (i.rewind(), j=0; i.next(); ++j)
240 {
241 p += wvtcl_escape(p, *i, i->len(), nasties);
242 if (j < count - 1)
243 *p++ = splitchars.first();
244 }
245 *p = '\0';
246
247 return result;
248}
249
250const size_t WVTCL_GETWORD_NONE (UINT_MAX);
251
252static size_t wvtcl_getword(char *dst, const char *s, size_t s_len,
253 const WvStringMask &splitchars,
254 bool do_unescape, size_t *end = NULL)
255{
256 //printf(" used=%d\n", origsize);
257 if (!s_len) return WVTCL_GETWORD_NONE;
258
259 bool inescape = false, inquote = false, incontinuation = false;
260 int bracecount = 0;
261 const char *origend = s + s_len;
262 const char *sptr, *eptr;
263
264 // skip leading separators
265 for (sptr = s; sptr != origend; sptr++)
266 {
267 if (!splitchars[*sptr])
268 break;
269 }
270
271 if (sptr == origend) // nothing left
272 return WVTCL_GETWORD_NONE;
273
274 // detect initial quote
275 if (*sptr == '"')
276 {
277 inquote = true;
278 eptr = sptr+1;
279 }
280 else
281 eptr = sptr;
282
283 // loop over string until something satisfactory is found
284 for (; eptr != origend; eptr++)
285 {
286 char ch = *eptr;
287
288 incontinuation = false;
289
290 if (inescape)
291 {
292 if (ch == '\n')
293 {
294 // technically we've finished the line-continuation
295 // sequence, but we require at least one more character
296 // in order to prove that there's a next line somewhere
297 // in the buffer. Otherwise we might stop parsing before
298 // we're "really" done if we're given input line-by-line.
299 //
300 // A better way to do this would be for getword() to *never*
301 // return a string unless it contains a separator character;
302 // then we wouldn't need this weird special case. But it
303 // don't work like that; we'll return the last word in the
304 // buffer even if it *doesn't* end in a separator character.
305 incontinuation = true;
306 }
307 inescape = false;
308 }
309 else if (ch == '\\')
310 {
311 inescape = true;
312 // now we need a character to complete the escape
313 }
314 else // not an escape sequence
315 {
316 // detect end of a quoted/unquoted string
317 if (bracecount == 0)
318 {
319 if (inquote)
320 {
321 if (ch == '"')
322 {
323 eptr++;
324 break;
325 }
326 }
327 else if (splitchars[ch])
328 break;
329 }
330
331 // match braces
332 if (!inquote)
333 {
334 if (ch == '{')
335 bracecount++;
336 else if (bracecount > 0 && ch == '}')
337 bracecount--;
338 }
339 }
340 }
341
342 if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
343 // not there yet...
344 return WVTCL_GETWORD_NONE;
345
346 //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr);
347 if (end) *end = eptr - s;
348
349 if (do_unescape)
350 return wvtcl_unescape(dst, sptr, eptr-sptr);
351 else
352 {
353 if (dst) memcpy(dst, sptr, eptr-sptr);
354 return eptr - sptr;
355 }
356}
357
358
359WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars,
360 bool do_unescape)
361{
362 int origsize = buf.used();
363 const char *origptr = (const char *)buf.get(origsize);
364
365 size_t end;
366 size_t len = wvtcl_getword(NULL, origptr, origsize,
367 splitchars, do_unescape, &end);
368 if (len == WVTCL_GETWORD_NONE)
369 {
370 buf.unget(origsize);
371 return WvString::null;
372 }
373
374 WvString result;
375 result.setsize(len+1);
376 char *e = result.edit();
377 e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape);
378 *e = '\0';
379
380 buf.unget(origsize - end);
381
382 return result;
383}
384
385
387 const WvStringMask &splitchars, bool do_unescape)
388{
389 const char *s = _s;
390 size_t s_len = _s.len();
391 for (;;)
392 {
393 size_t end;
394 size_t len = wvtcl_getword(NULL, s, s_len,
395 splitchars, do_unescape, &end);
396 if (len == WVTCL_GETWORD_NONE)
397 break;
398
399 WvString *word = new WvString();
400 word->setsize(len+1);
401
402 char *e = word->edit();
403 e += wvtcl_getword(e, s, s_len, splitchars, do_unescape);
404 *e = '\0';
405 l.append(word, true);
406
407 s += end;
408 s_len -= end;
409 }
410}
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
Definition wvstring.h:94
void rewind()
Rewinds the iterator to make it point to an imaginary element preceeding the first element of the lis...
Definition wvlinklist.h:90
WvLink * next()
Moves the iterator along the list to point to the next element.
Definition wvlinklist.h:103
The iterator type for linked lists.
Definition wvlinklist.h:351
A linked list container class.
Definition wvlinklist.h:198
void append(T *data, bool autofree, const char *id=NULL)
Appends the element to the end of the list.
Definition wvlinklist.h:276
A class used to provide a masked lookup for characters in a string.
const char first() const
Get the first character set into the mask.
WvString is an implementation of a simple and efficient printable-string class.
Definition wvstring.h:330
char * edit()
make the string editable, and return a non-const (char*)
Definition wvstring.h:397
Functions to handle "tcl-style" strings and lists.
WvString wvtcl_encode(WvList< WvString > &l, const WvStringMask &nasties=WVTCL_NASTY_SPACES, const WvStringMask &splitchars=WVTCL_SPLITCHARS)
encode a tcl-style list.
WvString wvtcl_unescape(WvStringParm s)
tcl-unescape a string.
WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties=WVTCL_NASTY_SPACES)
tcl-escape a string.
WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars=WVTCL_SPLITCHARS, bool do_unescape=true)
Get a single tcl word from an input buffer, and return the rest of the buffer untouched.
void wvtcl_decode(WvList< WvString > &l, WvStringParm _s, const WvStringMask &splitchars=WVTCL_SPLITCHARS, bool do_unescape=true)
split a tcl-style list.