Coverage for larch/io/mergegroups.py: 11%

146 statements  

« prev     ^ index     » next       coverage.py v7.6.0, created at 2024-10-16 21:04 +0000

1#!/usr/bin/env python 

2""" 

3merge groups, interpolating if necessary 

4""" 

5import os 

6import numpy as np 

7from larch import Group 

8from larch.math import interp, interp1d, index_of, remove_dups 

9from larch.utils.logging import getLogger 

10 

11_logger = getLogger("larch.io.mergegroups") 

12 

13def merge_groups(grouplist, master=None, xarray='energy', yarray='mu', 

14 kind='cubic', trim=True, calc_yerr=True): 

15 """merge arrays from a list of groups. 

16 

17 Arguments 

18 --------- 

19 grouplist list of groups to merge 

20 master group to use for common x arrary [None -> 1st group] 

21 xarray name of x-array for merge ['energy'] 

22 yarray name of y-array for merge ['mu'] 

23 kind interpolation kind ['cubic'] 

24 trim whether to trim to the shortest energy range [True] 

25 calc_yerr whether to use the variance in the input as yerr [True] 

26 

27 Returns 

28 -------- 

29 group with x-array and y-array containing merged data. 

30 

31 """ 

32 if master is None: 

33 master = grouplist[0] 

34 

35 xout = remove_dups(getattr(master, xarray)) 

36 xmins = [min(xout)] 

37 xmaxs = [max(xout)] 

38 yvals = [] 

39 

40 for g in grouplist: 

41 x = getattr(g, xarray) 

42 y = getattr(g, yarray) 

43 yvals.append(interp(x, y, xout, kind=kind)) 

44 xmins.append(min(x)) 

45 xmaxs.append(max(x)) 

46 

47 yvals = np.array(yvals) 

48 yave = yvals.mean(axis=0) 

49 ystd = yvals.std(axis=0) 

50 

51 xout_increasing = len(np.where(np.diff(np.argsort(xout))!=1)[0]) == 0 

52 if trim and xout_increasing: 

53 xmin = min(xmins) 

54 xmax = min(xmaxs) 

55 ixmin = index_of(xout, xmin) 

56 ixmax = index_of(xout, xmax) 

57 xout = xout[ixmin:ixmax] 

58 yave = yave[ixmin:ixmax] 

59 ystd = ystd[ixmin:ixmax] 

60 

61 grp = Group() 

62 setattr(grp, xarray, xout) 

63 setattr(grp, yarray, yave) 

64 setattr(grp, yarray + '_std', ystd) 

65 

66 if kind == 'cubic' and xout_increasing: 

67 y0 = getattr(master, yarray) 

68 # if the derivative gets much worse, use linear interpolation 

69 if max(np.diff(yave)) > 50*max(np.diff(y0)): 

70 grp = merge_groups(grouplist, master=master, xarray=xarray, 

71 yarray=yarray, trim=trim, 

72 calc_yerr=calc_yerr, kind='linear') 

73 return grp 

74 

75def imin(arr, debug=False): 

76 """index of minimum value""" 

77 _im = np.argmin(arr) 

78 if debug: 

79 _logger.debug("Check: {0} = {1}".format(np.min(arr), arr[_im])) 

80 return _im 

81 

82 

83def imax(arr, debug=False): 

84 """index of maximum value""" 

85 _im = np.argmax(arr) 

86 if debug: 

87 _logger.debug("Check: {0} = {1}".format(np.max(arr), arr[_im])) 

88 return _im 

89 

90 

91def lists_to_matrix(data, axis=None, **kws): 

92 """Convert two lists of 1D arrays to a 2D matrix 

93 

94 Parameters 

95 ---------- 

96 data : list of lists of 1D arrays 

97 [ 

98 [x1, ... xN] 

99 [z1, ... zN] 

100 ] 

101 axis : None or array 1D, optional 

102 a reference axis used for the interpolation [None -> xdats[0]] 

103 **kws : optional 

104 keyword arguments for scipy.interpolate.interp1d() 

105 

106 Returns 

107 ------- 

108 axis, outmat : arrays 

109 """ 

110 assert len(data) == 2, "'data' should be a list of two lists" 

111 xdats, zdats = data 

112 assert isinstance(xdats, list), "'xdats' not a list" 

113 assert isinstance(zdats, list), "'zdats' not a list" 

114 assert len(xdats) == len(zdats), "lists of data not of the same length" 

115 assert all(isinstance(z, np.ndarray) for z in zdats), "data in list must be arrays" 

116 if axis is None: 

117 axis = xdats[0] 

118 assert isinstance(axis, np.ndarray), "axis must be array" 

119 if all(z.size == axis.size for z in zdats): 

120 #: all same size 

121 return axis, np.array(zdats) 

122 else: 

123 #: interpolate 

124 outmat = np.zeros((len(zdats), axis.size)) 

125 for idat, (xdat, zdat) in enumerate(zip(xdats, zdats)): 

126 fdat = interp1d(xdat, zdat, **kws) 

127 znew = fdat(axis) 

128 outmat[idat] = znew 

129 return axis, outmat 

130 

131 

132def curves_to_matrix(curves, axis=None, **kws): 

133 """Convert a list of curves to a 2D data matrix 

134 

135 Parameters 

136 ---------- 

137 curves : list of lists 

138 Curves format is the following: 

139 [ 

140 [x1, y1, label1, info1], 

141 ... 

142 [xN, yN, labelN, infoN] 

143 ] 

144 axis : None or array 1D, optional 

145 a reference axis used for the interpolation [None -> curves[0][0]] 

146 **kws : optional 

147 keyword arguments for func:`scipy.interpolate.interp1d` 

148 

149 Returns 

150 ------- 

151 axis, outmat : arrays 

152 """ 

153 assert isinstance(curves, list), "'curves' not a list" 

154 assert all( 

155 (isinstance(curve, list) and len(curve) == 4) for curve in curves 

156 ), "curves should be lists of four elements" 

157 if axis is None: 

158 axis = curves[0][0] 

159 assert isinstance(axis, np.ndarray), "axis must be array" 

160 outmat = np.zeros((len(curves), axis.size)) 

161 for icurve, curve in enumerate(curves): 

162 assert len(curve) == 4, "wrong curve format, should contain four elements" 

163 x, y, label, info = curve 

164 try: 

165 assert isinstance(x, np.ndarray), "not array!" 

166 assert isinstance(y, np.ndarray), "not array!" 

167 except AssertionError: 

168 _logger.error( 

169 "[curve_to_matrix] Curve %d (%s) not containing arrays -> ADDING ZEROS", 

170 icurve, 

171 label, 

172 ) 

173 continue 

174 if (x.size == axis.size) and (y.size == axis.size): 

175 #: all same length 

176 outmat[icurve] = y 

177 else: 

178 #: interpolate 

179 fdat = interp1d(x, y, **kws) 

180 ynew = fdat(axis) 

181 outmat[icurve] = ynew 

182 _logger.debug("[curve_to_matrix] Curve %d (%s) interpolated", icurve, label) 

183 return axis, outmat 

184 

185 

186def sum_arrays_1d(data, axis=None, **kws): 

187 """Sum list of 1D arrays or curves by interpolation on a reference axis 

188 

189 Parameters 

190 ---------- 

191 data : lists of lists 

192 data_fmt : str 

193 define data format 

194 - "curves" -> :func:`curves_to_matrix` 

195 - "lists" -> :func:`curves_to_matrix` 

196 

197 Returns 

198 ------- 

199 axis, zsum : 1D arrays 

200 """ 

201 data_fmt = kws.pop("data_fmt", "curves") 

202 if data_fmt == "curves": 

203 ax, mat = curves_to_matrix(data, axis=axis, **kws) 

204 elif data_fmt == "lists": 

205 ax, mat = lists_to_matrix(data, axis=axis, **kws) 

206 else: 

207 raise NameError("'data_fmt' not understood") 

208 return ax, np.sum(mat, 0) 

209 

210 

211def avg_arrays_1d(data, axis=None, weights=None, **kws): 

212 """Average list of 1D arrays or curves by interpolation on a reference axis 

213 

214 Parameters 

215 ---------- 

216 data : lists of lists 

217 data_fmt : str 

218 define data format 

219 - "curves" -> :func:`curves_to_matrix` 

220 - "lists" -> :func:`lists_to_matrix` 

221 weights : None or array 

222 weights for the average 

223 

224 Returns 

225 ------- 

226 axis, zavg : 1D arrays 

227 np.average(zdats) 

228 """ 

229 data_fmt = kws.pop("data_fmt", "curves") 

230 if data_fmt == "curves": 

231 ax, mat = curves_to_matrix(data, axis=axis, **kws) 

232 elif data_fmt == "lists": 

233 ax, mat = lists_to_matrix(data, axis=axis, **kws) 

234 else: 

235 raise NameError("'data_fmt' not understood") 

236 return ax, np.average(mat, axis=0, weights=weights) 

237 

238 

239def merge_arrays_1d(data, method="average", axis=None, weights=None, **kws): 

240 """Merge a list of 1D arrays by interpolation on a reference axis 

241 

242 Parameters 

243 ---------- 

244 data : lists of lists 

245 data_fmt : str 

246 define data format 

247 - "curves" -> :func:`curves_to_matrix` 

248 - "lists" -> :func:`curves_to_matrix` 

249 axis : None or array 1D, optional 

250 a reference axis used for the interpolation [None -> xdats[0]] 

251 method : str, optional 

252 method used to merge, available methods are: 

253 - "average" : uses np.average() 

254 - "sum" : uses np.sum() 

255 weights : None or array 1D, optional 

256 used if method == "average" 

257 

258 Returns 

259 ------- 

260 axis, zmrg : 1D arrays 

261 merge(zdats) 

262 """ 

263 if method == "sum": 

264 return sum_arrays_1d(data, axis=axis, **kws) 

265 elif method == "average": 

266 return avg_arrays_1d(data, axis=axis, weights=weights, **kws) 

267 else: 

268 raise NameError("wrong 'method': %s" % method) 

269 

270 

271def rebin_piecewise_constant(x1, y1, x2): 

272 """Rebin histogram values y1 from old bin edges x1 to new edges x2. 

273 

274 Code taken from: https://github.com/jhykes/rebin/blob/master/rebin.py 

275 

276 It follows the procedure described in Figure 18.13 (chapter 18.IV.B. 

277 Spectrum Alignment, page 703) of Knoll [1] 

278 

279 References 

280 ---------- 

281 [1] Glenn Knoll, Radiation Detection and Measurement, third edition, 

282 Wiley, 2000. 

283 

284 Parameters 

285 ---------- 

286 - x1 : m+1 array of old bin edges. 

287 - y1 : m array of old histogram values. 

288 This is the total number in each bin, not an average. 

289 - x2 : n+1 array of new bin edges. 

290 

291 Returns 

292 ------- 

293 - y2 : n array of rebinned histogram values. 

294 """ 

295 x1 = np.asarray(x1) 

296 y1 = np.asarray(y1) 

297 x2 = np.asarray(x2) 

298 

299 # the fractional bin locations of the new bins in the old bins 

300 i_place = np.interp(x2, x1, np.arange(len(x1))) 

301 

302 cum_sum = np.r_[[0], np.cumsum(y1)] 

303 

304 # calculate bins where lower and upper bin edges span 

305 # greater than or equal to one original bin. 

306 # This is the contribution from the 'intact' bins (not including the 

307 # fractional start and end parts. 

308 whole_bins = np.floor(i_place[1:]) - np.ceil(i_place[:-1]) >= 1.0 

309 start = cum_sum[np.ceil(i_place[:-1]).astype(int)] 

310 finish = cum_sum[np.floor(i_place[1:]).astype(int)] 

311 

312 y2 = np.where(whole_bins, finish - start, 0.0) 

313 

314 bin_loc = np.clip(np.floor(i_place).astype(int), 0, len(y1) - 1) 

315 

316 # fractional contribution for bins where the new bin edges are in the same 

317 # original bin. 

318 same_cell = np.floor(i_place[1:]) == np.floor(i_place[:-1]) 

319 frac = i_place[1:] - i_place[:-1] 

320 contrib = frac * y1[bin_loc[:-1]] 

321 y2 += np.where(same_cell, contrib, 0.0) 

322 

323 # fractional contribution for bins where the left and right bin edges are in 

324 # different original bins. 

325 different_cell = np.floor(i_place[1:]) > np.floor(i_place[:-1]) 

326 frac_left = np.ceil(i_place[:-1]) - i_place[:-1] 

327 contrib = frac_left * y1[bin_loc[:-1]] 

328 

329 frac_right = i_place[1:] - np.floor(i_place[1:]) 

330 contrib += frac_right * y1[bin_loc[1:]] 

331 

332 y2 += np.where(different_cell, contrib, 0.0) 

333 

334 return y2 

335 

336 

337def reject_outliers(data, m=5.189, return_ma=False): 

338 """Reject outliers 

339 

340 Modified from: https://stackoverflow.com/questions/11686720/is-there-a-numpy-builtin-to-reject-outliers-from-a-list 

341 See also: https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm 

342 """ 

343 if not isinstance(data, np.ndarray): 

344 data = np.array(data) 

345 d = np.abs(data - np.median(data)) 

346 mdev = np.median(d) 

347 s = d / (mdev if mdev else 1.0) 

348 mask = s < m 

349 if return_ma: 

350 imask = s > m 

351 return np.ma.masked_array(data=data, mask=imask) 

352 else: 

353 return data[mask]