Coverage for larch/utils/strutils.py: 30%

219 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-10-16 21:04 +0000

1#!/usr/bin/env python 

2""" 

3utilities for larch 

4""" 

5from __future__ import print_function 

6import re 

7import sys 

8import os 

9import uuid 

10import hashlib 

11from base64 import b64encode, b32encode 

12from random import Random 

13from packaging import version as pkg_version 

14 

15from .gformat import gformat 

16 

17rng = Random() 

18 

19def bytes2str(s): 

20 if isinstance(s, str): 

21 return s 

22 elif isinstance(s, bytes): 

23 return s.decode(sys.stdout.encoding) 

24 return str(s, sys.stdout.encoding) 

25 

26def str2bytes(s): 

27 'string to byte conversion' 

28 if isinstance(s, bytes): 

29 return s 

30 return bytes(s, sys.stdout.encoding) 

31 

32 

33def strict_ascii(s, replacement='_'): 

34 """for string to be truly ASCII with all characters below 128""" 

35 t = bytes(s, 'UTF-8') 

36 return ''.join([chr(a) if a < 128 else replacement for a in t]) 

37 

38 

39RESERVED_WORDS = ('False', 'None', 'True', 'and', 'as', 'assert', 'async', 

40 'await', 'break', 'class', 'continue', 'def', 'del', 'elif', 

41 'else', 'end', 'enddef', 'endfor', 'endif', 'endtry', 

42 'endwhile', 'eval', 'except', 'exec', 'execfile', 'finally', 

43 'for', 'from', 'global', 'group', 'if', 'import', 'in', 'is', 

44 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 

45 'return', 'try', 'while', 'with', 'yield') 

46 

47 

48NAME_MATCH = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*$").match 

49VALID_SNAME_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' 

50VALID_NAME_CHARS = '.%s' % VALID_SNAME_CHARS 

51VALID_CHARS1 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' 

52 

53BAD_FILECHARS = ';~,`!%$@$&^?*#:"/|\'\\\t\r\n (){}[]<>' 

54GOOD_FILECHARS = '_'*len(BAD_FILECHARS) 

55 

56BAD_VARSCHARS = BAD_FILECHARS + '=+-.' 

57GOOD_VARSCHARS = '_'*len(BAD_VARSCHARS) 

58 

59TRANS_FILE = str.maketrans(BAD_FILECHARS, GOOD_FILECHARS) 

60TRANS_VARS = str.maketrans(BAD_VARSCHARS, GOOD_VARSCHARS) 

61 

62 

63def PrintExceptErr(err_str, print_trace=True): 

64 " print error on exceptions" 

65 print('\n***********************************') 

66 print(err_str) 

67 #print 'PrintExceptErr', err_str 

68 try: 

69 print('Error: %s' % sys.exc_type) 

70 etype, evalue, tback = sys.exc_info() 

71 if print_trace == False: 

72 tback = '' 

73 sys.excepthook(etype, evalue, tback) 

74 except: 

75 print('Error printing exception error!!') 

76 raise 

77 print('***********************************\n') 

78 

79def strip_comments(sinp, char='#'): 

80 "find character in a string, skipping over quoted text" 

81 if sinp.find(char) < 0: 

82 return sinp 

83 i = 0 

84 while i < len(sinp): 

85 tchar = sinp[i] 

86 if tchar in ('"',"'"): 

87 eoc = sinp[i+1:].find(tchar) 

88 if eoc > 0: 

89 i = i + eoc 

90 elif tchar == char: 

91 return sinp[:i].rstrip() 

92 i = i + 1 

93 return sinp 

94 

95def strip_quotes(t): 

96 d3, s3, d1, s1 = '"""', "'''", '"', "'" 

97 if hasattr(t, 'startswith'): 

98 if ((t.startswith(d3) and t.endswith(d3)) or 

99 (t.startswith(s3) and t.endswith(s3))): 

100 t = t[3:-3] 

101 elif ((t.startswith(d1) and t.endswith(d1)) or 

102 (t.startswith(s1) and t.endswith(s1))): 

103 t = t[1:-1] 

104 return t 

105 

106def isValidName(name): 

107 "input is a valid name" 

108 if name in RESERVED_WORDS: 

109 return False 

110 tnam = name[:].lower() 

111 return NAME_MATCH(tnam) is not None 

112 

113def fixName(name, allow_dot=True): 

114 "try to fix string to be a valid name" 

115 if isValidName(name): 

116 return name 

117 

118 if isValidName('_%s' % name): 

119 return '_%s' % name 

120 chars = [] 

121 valid_chars = VALID_SNAME_CHARS 

122 if allow_dot: 

123 valid_chars = VALID_NAME_CHARS 

124 for s in name: 

125 if s not in valid_chars: 

126 s = '_' 

127 chars.append(s) 

128 name = ''.join(chars) 

129 # last check (name may begin with a number or .) 

130 if not isValidName(name): 

131 name = '_%s' % name 

132 return name 

133 

134 

135def fix_filename(s): 

136 """fix string to be a 'good' filename. 

137 This may be a more restrictive than the OS, but 

138 avoids nasty cases.""" 

139 t = str(s).translate(TRANS_FILE) 

140 if t.count('.') > 1: 

141 for i in range(t.count('.') - 1): 

142 idot = t.find('.') 

143 t = "%s_%s" % (t[:idot], t[idot+1:]) 

144 return t 

145 

146def fix_varname(s): 

147 """fix string to be a 'good' variable name.""" 

148 t = str(s).translate(TRANS_VARS) 

149 

150 if len(t) < 1: 

151 t = '_unlabeled_' 

152 if t[0] not in VALID_CHARS1: 

153 t = '_%s' % t 

154 while t.endswith('_'): 

155 t = t[:-1] 

156 return t 

157 

158def common_startstring(words): 

159 """common starting substring for a list of words""" 

160 out = words[0] 

161 for tmp in words[1:]: 

162 i = 0 

163 for a, b in zip(out, tmp): 

164 if a == b: 

165 i += 1 

166 else: 

167 out = out[:i] 

168 return out 

169 

170 

171def unique_name(name, nlist, max=1000): 

172 """return name so that is is not in list, 

173 by appending _1, _2, ... as necessary up to a max suffix 

174 

175 >>> unique_name('foo', ['bar, 'baz']) 

176 'foo' 

177 

178 >>> unique_name('foo', ['foo', 'bar, 'baz']) 

179 'foo_1' 

180 

181 """ 

182 out = name 

183 if name in nlist: 

184 for i in range(1, max+1): 

185 out = "%s_%i" % (name, i) 

186 if out not in nlist: 

187 break 

188 return out 

189 

190 

191def isNumber(num): 

192 "input is a number" 

193 try: 

194 x = float(num) 

195 return True 

196 except (TypeError, ValueError): 

197 return False 

198 

199def asfloat(x): 

200 """try to convert value to float, or fail gracefully""" 

201 return float(x) if isNumber(x) else x 

202 

203 

204 

205def isLiteralStr(inp): 

206 "is a literal string" 

207 return ((inp.startswith("'") and inp.endswith("'")) or 

208 (inp.startswith('"') and inp.endswith('"'))) 

209 

210 

211def find_delims(s, delim='"',match=None): 

212 """find matching delimeters (quotes, braces, etc) in a string. 

213 returns 

214 True, index1, index2 if a match is found 

215 False, index1, len(s) if a match is not found 

216 the delimiter can be set with the keyword arg delim, 

217 and the matching delimiter with keyword arg match. 

218 

219 if match is None (default), match is set to delim. 

220 

221 >>> find_delims(mystr, delim=":") 

222 >>> find_delims(mystr, delim='<', match='>') 

223 """ 

224 esc, dbesc = "\\", "\\\\" 

225 if match is None: 

226 match = delim 

227 j = s.find(delim) 

228 if j > -1 and s[j:j+len(delim)] == delim: 

229 p1, p2, k = None, None, j 

230 while k < j+len(s[j+1:]): 

231 k = k+1 

232 if k > 0: p1 = s[k-1:k] 

233 if k > 1: p2 = s[k-2:k] 

234 if (s[k:k+len(match)] == match and not (p1 == esc and p2 != dbesc)): 

235 return True, j, k+len(match)-1 

236 p1 = s[k:k+1] 

237 return False, j, len(s) 

238 

239def version_ge(v1, v2): 

240 "returns whether version string 1 >= version_string2" 

241 return pkg_version.parse(v1) >= pkg_version.parse(v2) 

242 

243def b32hash(s): 

244 """return a base32 hash of a string""" 

245 _hash = hashlib.sha256() 

246 _hash.update(str2bytes(s)) 

247 return bytes2str(b32encode(_hash.digest())) 

248 

249def b64hash(s): 

250 """return a base64 hash of a string""" 

251 _hash = hashlib.sha256() 

252 _hash.update(str2bytes(s)) 

253 return bytes2str(b64encode(_hash.digest())) 

254 

255def get_sessionid(): 

256 """get 8 character string encoding machine name and process id""" 

257 _hash = hashlib.sha256() 

258 _hash.update(f"{uuid.getnode():d} {os.getpid():d}".encode('ASCII')) 

259 out = b64encode(_hash.digest()).decode('ASCII')[3:11] 

260 return out.replace('/', '-').replace('+', '=') 

261 

262 

263def random_varname(n, rng_seed=None): 

264 L = 'abcdefghijklmnopqrstuvwxyz0123456789' 

265 

266 global rng 

267 if rng_seed is None: 

268 rng.seed(rng_seed) 

269 return rng.choice(L[:26]) + ''.join([rng.choice(L) for _ in range(n-1)]) 

270 

271 

272def file2groupname(filename, slen=9, minlen=2, symtable=None, rng_seed=None): 

273 """create a group name based of filename 

274 the group name will have a string component of 

275 length slen followed by a 2 digit number 

276 

277 Arguments 

278 --------- 

279 filename (str) filename to use 

280 slen (int) maximum length of string portion (default 9) 

281 symtable (None or larch symbol table) symbol table for 

282 checking that the group name is unique 

283 """ 

284 global rng 

285 if rng_seed is None: 

286 rng.seed(rng_seed) 

287 

288 gname = fix_varname(filename).lower().replace('_', '') 

289 

290 if gname[0] not in 'abcdefghijklmnopqrstuvwxyz': 

291 gname = rng.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g']) + gname 

292 if len(gname) < minlen: 

293 gname = gname + random_varname(minlen-len(gname)) 

294 

295 gname = gname[:slen] 

296 if symtable is None: 

297 return gname 

298 

299 gbase = gname 

300 scount, count, n = 0, 0, 2 

301 while hasattr(symtable, gname): 

302 count += 1 

303 if count == 100: 

304 count = 1 

305 scount += 1 

306 if scount > 200: 

307 scount = 0 

308 n = n + 1 

309 gbase = gname + random_varname(n) 

310 gname = f"{gbase}{count:02d}" 

311 return gname 

312 

313 

314def break_longstring(s, maxlen=90, n1=20): 

315 """breaks a long string into a list of smaller strings, 

316 broken at commas, space, tab, period, or slash 

317 

318 returns a list of strings, even if length 1""" 

319 

320 minlen = maxlen-n1 

321 

322 if len(s) < maxlen: 

323 return [s] 

324 out = [] 

325 while len(s) > maxlen: 

326 icomma = s[minlen:].find(',') 

327 ispace = s[minlen:].find(' ') 

328 itab = s[minlen:].find('\t') 

329 idot = s[minlen:].find('.') 

330 islash = s[minlen:].find('/') 

331 ibreak = -1 

332 if icomma > 0: ibreak = icomma 

333 elif ispace > 0: ibreak = ispace 

334 elif itab > 0: ibreak = itab 

335 elif idot > 0: ibreak = idot 

336 elif islash > 0: ibreak = islash 

337 if ibreak < 0: 

338 ibreak = maxlen 

339 out.append(s[:ibreak+minlen+1]) 

340 s = s[ibreak+minlen+1:] 

341 out.append(s) 

342 return out 

343 

344 

345def array_hash(arr, len=12): 

346 """generate hash for an array, to tell if an array has changed""" 

347 return b32hash(''.join([gformat(x, length=16) for x in arr]))[:len].lower()