Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1/****************************************************************************/
2// Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3// Copyright (C) 2001-2022 German Aerospace Center (DLR) and others.
4// This program and the accompanying materials are made available under the
5// terms of the Eclipse Public License 2.0 which is available at
6// https://www.eclipse.org/legal/epl-2.0/
7// This Source Code may also be made available under the following Secondary
8// Licenses when the conditions for such availability set forth in the Eclipse
9// Public License 2.0 are satisfied: GNU General Public License, version 2
10// or later which is available at
11// https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12// SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13/****************************************************************************/
21// Some static methods for string processing
22/****************************************************************************/
23#include <config.h>
24
25#include <string>
26#include <iostream>
27#include <cstdio>
28#include <cstring>
29#include <regex>
30#ifdef WIN32
31#define NOMINMAX
32#include <windows.h>
33#undef NOMINMAX
34#else
35#include <unistd.h>
36#endif
37#include <xercesc/util/TransService.hpp>
38#include <xercesc/util/TranscodingException.hpp>
41#include "StringUtils.h"
42
43
44// ===========================================================================
45// static member definitions
46// ===========================================================================
47std::string StringUtils::emptyString;
48XERCES_CPP_NAMESPACE::XMLLCPTranscoder* StringUtils::myLCPTranscoder = nullptr;
49
50
51// ===========================================================================
52// method definitions
53// ===========================================================================
54std::string
55StringUtils::prune(const std::string& str) {
56 const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
57 if (std::string::npos != endpos) {
58 const int startpos = (int)str.find_first_not_of(" \t\n\r");
59 return str.substr(startpos, endpos - startpos + 1);
60 }
61 return "";
62}
63
64
65std::string
66StringUtils::pruneZeros(const std::string& str, int max) {
67 const std::string::size_type endpos = str.find_last_not_of("0");
68 if (endpos != std::string::npos && str.back() == '0') {
69 std::string res = str.substr(0, MAX2((int)str.size() - max, (int)endpos + 1));
70 return res;
71 }
72 return str;
73}
74
75std::string
76StringUtils::to_lower_case(const std::string& str) {
77 std::string s = str;
78 std::transform(s.begin(), s.end(), s.begin(), [](char c) {
79 return (char)::tolower(c);
80 });
81 return s;
82}
83
84
85std::string
87 // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
88 std::string result;
89 for (int i = 0; i < (int)str.length(); i++) {
90 const unsigned char c = str[i];
91 if (c < 128) {
92 result += c;
93 } else {
94 result += (char)(0xc2 + (c > 0xbf));
95 result += (char)((c & 0x3f) + 0x80);
96 }
97 }
98 return result;
99}
100
101
102std::string
104 str = replace(str, "\xE4", "ae");
105 str = replace(str, "\xC4", "Ae");
106 str = replace(str, "\xF6", "oe");
107 str = replace(str, "\xD6", "Oe");
108 str = replace(str, "\xFC", "ue");
109 str = replace(str, "\xDC", "Ue");
110 str = replace(str, "\xDF", "ss");
111 str = replace(str, "\xC9", "E");
112 str = replace(str, "\xE9", "e");
113 str = replace(str, "\xC8", "E");
114 str = replace(str, "\xE8", "e");
115 return str;
116}
117
118
119std::string
120StringUtils::replace(std::string str, const std::string& what, const std::string& by) {
121 std::string::size_type idx = str.find(what);
122 const int what_len = (int)what.length();
123 if (what_len > 0) {
124 const int by_len = (int)by.length();
125 while (idx != std::string::npos) {
126 str = str.replace(idx, what_len, by);
127 idx = str.find(what, idx + by_len);
128 }
129 }
130 return str;
131}
132
133
134std::string
135StringUtils::substituteEnvironment(const std::string& str, const std::chrono::time_point<std::chrono::system_clock>* const timeRef) {
136 std::string s = str;
137 if (timeRef != nullptr) {
138 const std::string::size_type localTimeIndex = str.find("${LOCALTIME}");
139 const std::string::size_type utcIndex = str.find("${UTC}");
140 const bool isUTC = utcIndex != std::string::npos;
141 if (localTimeIndex != std::string::npos || isUTC) {
142 const time_t rawtime = std::chrono::system_clock::to_time_t(*timeRef);
143 char buffer [80];
144 struct tm* timeinfo = isUTC ? gmtime(&rawtime) : localtime(&rawtime);
145 strftime(buffer, 80, "%Y-%m-%d-%H-%M-%S.", timeinfo);
146 auto seconds = std::chrono::time_point_cast<std::chrono::seconds>(*timeRef);
147 auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(*timeRef - seconds);
148 const std::string micro = buffer + toString(microseconds.count());
149 if (isUTC) {
150 s.replace(utcIndex, 6, micro);
151 } else {
152 s.replace(localTimeIndex, 12, micro);
153 }
154 }
155 }
156 const std::string::size_type pidIndex = str.find("${PID}");
157 if (pidIndex != std::string::npos) {
158#ifdef WIN32
159 s.replace(pidIndex, 6, toString(::GetCurrentProcessId()));
160#else
161 s.replace(pidIndex, 6, toString(::getpid()));
162#endif
163 }
164 if (std::getenv("SUMO_LOGO") == nullptr) {
165 s = replace(s, "${SUMO_LOGO}", "${SUMO_HOME}/data/logo/sumo-128x138.png");
166 }
167 const std::string::size_type tildeIndex = str.find("~");
168 if (tildeIndex == 0) {
169 s.replace(0, 1, "${HOME}");
170 }
171 s = replace(s, ",~", ",${HOME}");
172#ifdef WIN32
173 if (std::getenv("HOME") == nullptr) {
174 s = replace(s, "${HOME}", "${USERPROFILE}");
175 }
176#endif
177
178 // Expression for an environment variables, e.g. ${NAME}
179 // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
180 // - .+? looks for the shortest match (non-greedy)
181 // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
182 std::regex envVarExpr(R"(\$\{(.+?)\})");
183
184 // Are there any variables in this string?
185 std::smatch match;
186 std::string strIter = s;
187
188 // Loop over the entire value string and look for variable names
189 while (std::regex_search(strIter, match, envVarExpr)) {
190 std::string varName = match[1];
191
192 // Find the variable in the environment and its value
193 std::string varValue;
194 if (std::getenv(varName.c_str()) != nullptr) {
195 varValue = std::getenv(varName.c_str());
196 }
197
198 // Replace the variable placeholder with its value in the original string
199 s = std::regex_replace(s, std::regex("\\$\\{" + varName + "\\}"), varValue);
200
201 // Continue the loop with the remainder of the string
202 strIter = match.suffix();
203 }
204 return s;
205}
206
207
208bool
209StringUtils::startsWith(const std::string& str, const std::string prefix) {
210 return str.compare(0, prefix.length(), prefix) == 0;
211}
212
213
214bool
215StringUtils::endsWith(const std::string& str, const std::string suffix) {
216 if (str.length() >= suffix.length()) {
217 return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
218 } else {
219 return false;
220 }
221}
222
223
224std::string
225StringUtils::padFront(const std::string& str, int length, char padding) {
226 return std::string(MAX2(0, length - (int)str.size()), padding) + str;
227}
228
229
230std::string
231StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
232 std::string result = replace(orig, "&", "&amp;");
233 result = replace(result, ">", "&gt;");
234 result = replace(result, "<", "&lt;");
235 result = replace(result, "\"", "&quot;");
236 if (maskDoubleHyphen) {
237 result = replace(result, "--", "&#45;&#45;");
238 }
239 for (char invalid = '\1'; invalid < ' '; invalid++) {
240 result = replace(result, std::string(1, invalid).c_str(), "");
241 }
242 return replace(result, "'", "&apos;");
243}
244
245
246std::string
247StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
248 std::ostringstream out;
249
250 for (int i = 0; i < (int)toEncode.length(); ++i) {
251 const char t = toEncode.at(i);
252
253 if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
254 (encodeWhich == "" &&
255 ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
256 (t >= 65 && t <= 90) || // A-Z
257 t == 95 || // underscore
258 (t >= 97 && t <= 122) || // a-z
259 t == 126)) // tilde
260 ) {
261 out << toEncode.at(i);
262 } else {
263 out << charToHex(toEncode.at(i));
264 }
265 }
266
267 return out.str();
268}
269
270
271std::string
272StringUtils::urlDecode(const std::string& toDecode) {
273 std::ostringstream out;
274
275 for (int i = 0; i < (int)toDecode.length(); ++i) {
276 if (toDecode.at(i) == '%') {
277 std::string str(toDecode.substr(i + 1, 2));
278 out << hexToChar(str);
279 i += 2;
280 } else {
281 out << toDecode.at(i);
282 }
283 }
284
285 return out.str();
286}
287
288std::string
289StringUtils::charToHex(unsigned char c) {
290 short i = c;
291
292 std::stringstream s;
293
294 s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
295
296 return s.str();
297}
298
299
300unsigned char
301StringUtils::hexToChar(const std::string& str) {
302 short c = 0;
303 if (!str.empty()) {
304 std::istringstream in(str);
305 in >> std::hex >> c;
306 if (in.fail()) {
307 throw NumberFormatException(str + " could not be interpreted as hex");
308 }
309 }
310 return static_cast<unsigned char>(c);
311}
312
313
314int
315StringUtils::toInt(const std::string& sData) {
316 long long int result = toLong(sData);
317 if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
318 throw NumberFormatException(toString(result) + " int overflow");
319 }
320 return (int)result;
321}
322
323
324int
325StringUtils::toIntSecure(const std::string& sData, int def) {
326 if (sData.length() == 0) {
327 return def;
328 }
329 return toInt(sData);
330}
331
332
333long long int
334StringUtils::toLong(const std::string& sData) {
335 const char* const data = sData.c_str();
336 if (data == 0 || data[0] == 0) {
337 throw EmptyData();
338 }
339 char* end;
340 errno = 0;
341#ifdef WIN32
342 long long int ret = _strtoi64(data, &end, 10);
343#else
344 long long int ret = strtoll(data, &end, 10);
345#endif
346 if (errno == ERANGE) {
347 errno = 0;
348 throw NumberFormatException("(long long integer range) " + sData);
349 }
350 if ((int)(end - data) != (int)strlen(data)) {
351 throw NumberFormatException("(long long integer format) " + sData);
352 }
353 return ret;
354}
355
356
357int
358StringUtils::hexToInt(const std::string& sData) {
359 if (sData.length() == 0) {
360 throw EmptyData();
361 }
362 size_t idx = 0;
363 int result;
364 try {
365 if (sData[0] == '#') { // for html color codes
366 result = std::stoi(sData.substr(1), &idx, 16);
367 idx++;
368 } else {
369 result = std::stoi(sData, &idx, 16);
370 }
371 } catch (...) {
372 throw NumberFormatException("(hex integer format) " + sData);
373 }
374 if (idx != sData.length()) {
375 throw NumberFormatException("(hex integer format) " + sData);
376 }
377 return result;
378}
379
380
381double
382StringUtils::toDouble(const std::string& sData) {
383 if (sData.size() == 0) {
384 throw EmptyData();
385 }
386 try {
387 size_t idx = 0;
388 const double result = std::stod(sData, &idx);
389 if (idx != sData.size()) {
390 throw NumberFormatException("(double format) " + sData);
391 } else {
392 return result;
393 }
394 } catch (...) {
395 // invalid_argument or out_of_range
396 throw NumberFormatException("(double) " + sData);
397 }
398}
399
400
401double
402StringUtils::toDoubleSecure(const std::string& sData, const double def) {
403 if (sData.length() == 0) {
404 return def;
405 }
406 return toDouble(sData);
407}
408
409
410bool
411StringUtils::toBool(const std::string& sData) {
412 if (sData.length() == 0) {
413 throw EmptyData();
414 }
415 const std::string s = to_lower_case(sData);
416 if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
417 return true;
418 }
419 if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
420 return false;
421 }
422 throw BoolFormatException(s);
423}
424
425
426std::string
427StringUtils::transcode(const XMLCh* const data, int length) {
428 if (data == 0) {
429 throw EmptyData();
430 }
431 if (length == 0) {
432 return "";
433 }
434#if _XERCES_VERSION < 30100
435 char* t = XERCES_CPP_NAMESPACE::XMLString::transcode(data);
436 std::string result(t);
437 XERCES_CPP_NAMESPACE::XMLString::release(&t);
438 return result;
439#else
440 try {
441 XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
442 return reinterpret_cast<const char*>(utf8.str());
443 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
444 return "?";
445 }
446#endif
447}
448
449
450std::string
451StringUtils::transcodeFromLocal(const std::string& localString) {
452#if _XERCES_VERSION > 30100
453 try {
454 if (myLCPTranscoder == nullptr) {
455 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
456 }
457 if (myLCPTranscoder != nullptr) {
458 return transcode(myLCPTranscoder->transcode(localString.c_str()));
459 }
460 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
461#endif
462 return localString;
463}
464
465
466std::string
467StringUtils::transcodeToLocal(const std::string& utf8String) {
468#if _XERCES_VERSION > 30100
469 try {
470 if (myLCPTranscoder == nullptr) {
471 myLCPTranscoder = XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgTransService->makeNewLCPTranscoder(XERCES_CPP_NAMESPACE::XMLPlatformUtils::fgMemoryManager);
472 }
473 if (myLCPTranscoder != nullptr) {
474 XERCES_CPP_NAMESPACE::TranscodeFromStr utf8(reinterpret_cast<const XMLByte*>(utf8String.c_str()), utf8String.size(), "UTF-8");
475 return myLCPTranscoder->transcode(utf8.str());
476 }
477 } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {}
478#endif
479 return utf8String;
480}
481
482
483std::string
484StringUtils::trim_left(const std::string s, const std::string& t) {
485 std::string result = s;
486 result.erase(0, s.find_first_not_of(t));
487 return result;
488}
489
490std::string
491StringUtils::trim_right(const std::string s, const std::string& t) {
492 std::string result = s;
493 result.erase(s.find_last_not_of(t) + 1);
494 return result;
495}
496
497std::string
498StringUtils::trim(const std::string s, const std::string& t) {
499 return trim_right(trim_left(s, t), t);
500}
501
502void
504 myLCPTranscoder = nullptr;
505}
506
507/****************************************************************************/
T MAX2(T a, T b)
Definition: StdDefs.h:77
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:46
static std::string pruneZeros(const std::string &str, int max)
Removes trailing zeros (at most 'max')
Definition: StringUtils.cpp:66
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
static std::string charToHex(unsigned char c)
static std::string urlDecode(const std::string &encoded)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string to_lower_case(const std::string &str)
Transfers the content to lower case.
Definition: StringUtils.cpp:76
static void resetTranscoder()
must be called when shutting down the xml subsystem
static XERCES_CPP_NAMESPACE::XMLLCPTranscoder * myLCPTranscoder
Definition: StringUtils.h:197
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string replace(std::string str, const std::string &what, const std::string &by)
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:86
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:55
static std::string padFront(const std::string &str, int length, char padding)
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
static unsigned char hexToChar(const std::string &str)
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string emptyString
An empty string.
Definition: StringUtils.h:87
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string substituteEnvironment(const std::string &str, const std::chrono::time_point< std::chrono::system_clock > *const timeRef=nullptr)
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:140
static std::string transcodeToLocal(const std::string &utf8String)
convert a string from UTF-8 to the local codepage
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static std::string transcodeFromLocal(const std::string &localString)
convert a string from the local codepage to UTF-8
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter