Coverage for larch/utils/strutils.py: 30%
219 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-10-16 21:04 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2024-10-16 21:04 +0000
1#!/usr/bin/env python
2"""
3utilities for larch
4"""
5from __future__ import print_function
6import re
7import sys
8import os
9import uuid
10import hashlib
11from base64 import b64encode, b32encode
12from random import Random
13from packaging import version as pkg_version
15from .gformat import gformat
17rng = Random()
19def bytes2str(s):
20 if isinstance(s, str):
21 return s
22 elif isinstance(s, bytes):
23 return s.decode(sys.stdout.encoding)
24 return str(s, sys.stdout.encoding)
26def str2bytes(s):
27 'string to byte conversion'
28 if isinstance(s, bytes):
29 return s
30 return bytes(s, sys.stdout.encoding)
33def strict_ascii(s, replacement='_'):
34 """for string to be truly ASCII with all characters below 128"""
35 t = bytes(s, 'UTF-8')
36 return ''.join([chr(a) if a < 128 else replacement for a in t])
39RESERVED_WORDS = ('False', 'None', 'True', 'and', 'as', 'assert', 'async',
40 'await', 'break', 'class', 'continue', 'def', 'del', 'elif',
41 'else', 'end', 'enddef', 'endfor', 'endif', 'endtry',
42 'endwhile', 'eval', 'except', 'exec', 'execfile', 'finally',
43 'for', 'from', 'global', 'group', 'if', 'import', 'in', 'is',
44 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise',
45 'return', 'try', 'while', 'with', 'yield')
48NAME_MATCH = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*$").match
49VALID_SNAME_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
50VALID_NAME_CHARS = '.%s' % VALID_SNAME_CHARS
51VALID_CHARS1 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
53BAD_FILECHARS = ';~,`!%$@$&^?*#:"/|\'\\\t\r\n (){}[]<>'
54GOOD_FILECHARS = '_'*len(BAD_FILECHARS)
56BAD_VARSCHARS = BAD_FILECHARS + '=+-.'
57GOOD_VARSCHARS = '_'*len(BAD_VARSCHARS)
59TRANS_FILE = str.maketrans(BAD_FILECHARS, GOOD_FILECHARS)
60TRANS_VARS = str.maketrans(BAD_VARSCHARS, GOOD_VARSCHARS)
63def PrintExceptErr(err_str, print_trace=True):
64 " print error on exceptions"
65 print('\n***********************************')
66 print(err_str)
67 #print 'PrintExceptErr', err_str
68 try:
69 print('Error: %s' % sys.exc_type)
70 etype, evalue, tback = sys.exc_info()
71 if print_trace == False:
72 tback = ''
73 sys.excepthook(etype, evalue, tback)
74 except:
75 print('Error printing exception error!!')
76 raise
77 print('***********************************\n')
79def strip_comments(sinp, char='#'):
80 "find character in a string, skipping over quoted text"
81 if sinp.find(char) < 0:
82 return sinp
83 i = 0
84 while i < len(sinp):
85 tchar = sinp[i]
86 if tchar in ('"',"'"):
87 eoc = sinp[i+1:].find(tchar)
88 if eoc > 0:
89 i = i + eoc
90 elif tchar == char:
91 return sinp[:i].rstrip()
92 i = i + 1
93 return sinp
95def strip_quotes(t):
96 d3, s3, d1, s1 = '"""', "'''", '"', "'"
97 if hasattr(t, 'startswith'):
98 if ((t.startswith(d3) and t.endswith(d3)) or
99 (t.startswith(s3) and t.endswith(s3))):
100 t = t[3:-3]
101 elif ((t.startswith(d1) and t.endswith(d1)) or
102 (t.startswith(s1) and t.endswith(s1))):
103 t = t[1:-1]
104 return t
106def isValidName(name):
107 "input is a valid name"
108 if name in RESERVED_WORDS:
109 return False
110 tnam = name[:].lower()
111 return NAME_MATCH(tnam) is not None
113def fixName(name, allow_dot=True):
114 "try to fix string to be a valid name"
115 if isValidName(name):
116 return name
118 if isValidName('_%s' % name):
119 return '_%s' % name
120 chars = []
121 valid_chars = VALID_SNAME_CHARS
122 if allow_dot:
123 valid_chars = VALID_NAME_CHARS
124 for s in name:
125 if s not in valid_chars:
126 s = '_'
127 chars.append(s)
128 name = ''.join(chars)
129 # last check (name may begin with a number or .)
130 if not isValidName(name):
131 name = '_%s' % name
132 return name
135def fix_filename(s):
136 """fix string to be a 'good' filename.
137 This may be a more restrictive than the OS, but
138 avoids nasty cases."""
139 t = str(s).translate(TRANS_FILE)
140 if t.count('.') > 1:
141 for i in range(t.count('.') - 1):
142 idot = t.find('.')
143 t = "%s_%s" % (t[:idot], t[idot+1:])
144 return t
146def fix_varname(s):
147 """fix string to be a 'good' variable name."""
148 t = str(s).translate(TRANS_VARS)
150 if len(t) < 1:
151 t = '_unlabeled_'
152 if t[0] not in VALID_CHARS1:
153 t = '_%s' % t
154 while t.endswith('_'):
155 t = t[:-1]
156 return t
158def common_startstring(words):
159 """common starting substring for a list of words"""
160 out = words[0]
161 for tmp in words[1:]:
162 i = 0
163 for a, b in zip(out, tmp):
164 if a == b:
165 i += 1
166 else:
167 out = out[:i]
168 return out
171def unique_name(name, nlist, max=1000):
172 """return name so that is is not in list,
173 by appending _1, _2, ... as necessary up to a max suffix
175 >>> unique_name('foo', ['bar, 'baz'])
176 'foo'
178 >>> unique_name('foo', ['foo', 'bar, 'baz'])
179 'foo_1'
181 """
182 out = name
183 if name in nlist:
184 for i in range(1, max+1):
185 out = "%s_%i" % (name, i)
186 if out not in nlist:
187 break
188 return out
191def isNumber(num):
192 "input is a number"
193 try:
194 x = float(num)
195 return True
196 except (TypeError, ValueError):
197 return False
199def asfloat(x):
200 """try to convert value to float, or fail gracefully"""
201 return float(x) if isNumber(x) else x
205def isLiteralStr(inp):
206 "is a literal string"
207 return ((inp.startswith("'") and inp.endswith("'")) or
208 (inp.startswith('"') and inp.endswith('"')))
211def find_delims(s, delim='"',match=None):
212 """find matching delimeters (quotes, braces, etc) in a string.
213 returns
214 True, index1, index2 if a match is found
215 False, index1, len(s) if a match is not found
216 the delimiter can be set with the keyword arg delim,
217 and the matching delimiter with keyword arg match.
219 if match is None (default), match is set to delim.
221 >>> find_delims(mystr, delim=":")
222 >>> find_delims(mystr, delim='<', match='>')
223 """
224 esc, dbesc = "\\", "\\\\"
225 if match is None:
226 match = delim
227 j = s.find(delim)
228 if j > -1 and s[j:j+len(delim)] == delim:
229 p1, p2, k = None, None, j
230 while k < j+len(s[j+1:]):
231 k = k+1
232 if k > 0: p1 = s[k-1:k]
233 if k > 1: p2 = s[k-2:k]
234 if (s[k:k+len(match)] == match and not (p1 == esc and p2 != dbesc)):
235 return True, j, k+len(match)-1
236 p1 = s[k:k+1]
237 return False, j, len(s)
239def version_ge(v1, v2):
240 "returns whether version string 1 >= version_string2"
241 return pkg_version.parse(v1) >= pkg_version.parse(v2)
243def b32hash(s):
244 """return a base32 hash of a string"""
245 _hash = hashlib.sha256()
246 _hash.update(str2bytes(s))
247 return bytes2str(b32encode(_hash.digest()))
249def b64hash(s):
250 """return a base64 hash of a string"""
251 _hash = hashlib.sha256()
252 _hash.update(str2bytes(s))
253 return bytes2str(b64encode(_hash.digest()))
255def get_sessionid():
256 """get 8 character string encoding machine name and process id"""
257 _hash = hashlib.sha256()
258 _hash.update(f"{uuid.getnode():d} {os.getpid():d}".encode('ASCII'))
259 out = b64encode(_hash.digest()).decode('ASCII')[3:11]
260 return out.replace('/', '-').replace('+', '=')
263def random_varname(n, rng_seed=None):
264 L = 'abcdefghijklmnopqrstuvwxyz0123456789'
266 global rng
267 if rng_seed is None:
268 rng.seed(rng_seed)
269 return rng.choice(L[:26]) + ''.join([rng.choice(L) for _ in range(n-1)])
272def file2groupname(filename, slen=9, minlen=2, symtable=None, rng_seed=None):
273 """create a group name based of filename
274 the group name will have a string component of
275 length slen followed by a 2 digit number
277 Arguments
278 ---------
279 filename (str) filename to use
280 slen (int) maximum length of string portion (default 9)
281 symtable (None or larch symbol table) symbol table for
282 checking that the group name is unique
283 """
284 global rng
285 if rng_seed is None:
286 rng.seed(rng_seed)
288 gname = fix_varname(filename).lower().replace('_', '')
290 if gname[0] not in 'abcdefghijklmnopqrstuvwxyz':
291 gname = rng.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g']) + gname
292 if len(gname) < minlen:
293 gname = gname + random_varname(minlen-len(gname))
295 gname = gname[:slen]
296 if symtable is None:
297 return gname
299 gbase = gname
300 scount, count, n = 0, 0, 2
301 while hasattr(symtable, gname):
302 count += 1
303 if count == 100:
304 count = 1
305 scount += 1
306 if scount > 200:
307 scount = 0
308 n = n + 1
309 gbase = gname + random_varname(n)
310 gname = f"{gbase}{count:02d}"
311 return gname
314def break_longstring(s, maxlen=90, n1=20):
315 """breaks a long string into a list of smaller strings,
316 broken at commas, space, tab, period, or slash
318 returns a list of strings, even if length 1"""
320 minlen = maxlen-n1
322 if len(s) < maxlen:
323 return [s]
324 out = []
325 while len(s) > maxlen:
326 icomma = s[minlen:].find(',')
327 ispace = s[minlen:].find(' ')
328 itab = s[minlen:].find('\t')
329 idot = s[minlen:].find('.')
330 islash = s[minlen:].find('/')
331 ibreak = -1
332 if icomma > 0: ibreak = icomma
333 elif ispace > 0: ibreak = ispace
334 elif itab > 0: ibreak = itab
335 elif idot > 0: ibreak = idot
336 elif islash > 0: ibreak = islash
337 if ibreak < 0:
338 ibreak = maxlen
339 out.append(s[:ibreak+minlen+1])
340 s = s[ibreak+minlen+1:]
341 out.append(s)
342 return out
345def array_hash(arr, len=12):
346 """generate hash for an array, to tell if an array has changed"""
347 return b32hash(''.join([gformat(x, length=16) for x in arr]))[:len].lower()