Coverage for larch/io/mergegroups.py: 11%
146 statements
« prev ^ index » next coverage.py v7.6.0, created at 2024-10-16 21:04 +0000
« prev ^ index » next coverage.py v7.6.0, created at 2024-10-16 21:04 +0000
1#!/usr/bin/env python
2"""
3merge groups, interpolating if necessary
4"""
5import os
6import numpy as np
7from larch import Group
8from larch.math import interp, interp1d, index_of, remove_dups
9from larch.utils.logging import getLogger
11_logger = getLogger("larch.io.mergegroups")
13def merge_groups(grouplist, master=None, xarray='energy', yarray='mu',
14 kind='cubic', trim=True, calc_yerr=True):
15 """merge arrays from a list of groups.
17 Arguments
18 ---------
19 grouplist list of groups to merge
20 master group to use for common x arrary [None -> 1st group]
21 xarray name of x-array for merge ['energy']
22 yarray name of y-array for merge ['mu']
23 kind interpolation kind ['cubic']
24 trim whether to trim to the shortest energy range [True]
25 calc_yerr whether to use the variance in the input as yerr [True]
27 Returns
28 --------
29 group with x-array and y-array containing merged data.
31 """
32 if master is None:
33 master = grouplist[0]
35 xout = remove_dups(getattr(master, xarray))
36 xmins = [min(xout)]
37 xmaxs = [max(xout)]
38 yvals = []
40 for g in grouplist:
41 x = getattr(g, xarray)
42 y = getattr(g, yarray)
43 yvals.append(interp(x, y, xout, kind=kind))
44 xmins.append(min(x))
45 xmaxs.append(max(x))
47 yvals = np.array(yvals)
48 yave = yvals.mean(axis=0)
49 ystd = yvals.std(axis=0)
51 xout_increasing = len(np.where(np.diff(np.argsort(xout))!=1)[0]) == 0
52 if trim and xout_increasing:
53 xmin = min(xmins)
54 xmax = min(xmaxs)
55 ixmin = index_of(xout, xmin)
56 ixmax = index_of(xout, xmax)
57 xout = xout[ixmin:ixmax]
58 yave = yave[ixmin:ixmax]
59 ystd = ystd[ixmin:ixmax]
61 grp = Group()
62 setattr(grp, xarray, xout)
63 setattr(grp, yarray, yave)
64 setattr(grp, yarray + '_std', ystd)
66 if kind == 'cubic' and xout_increasing:
67 y0 = getattr(master, yarray)
68 # if the derivative gets much worse, use linear interpolation
69 if max(np.diff(yave)) > 50*max(np.diff(y0)):
70 grp = merge_groups(grouplist, master=master, xarray=xarray,
71 yarray=yarray, trim=trim,
72 calc_yerr=calc_yerr, kind='linear')
73 return grp
75def imin(arr, debug=False):
76 """index of minimum value"""
77 _im = np.argmin(arr)
78 if debug:
79 _logger.debug("Check: {0} = {1}".format(np.min(arr), arr[_im]))
80 return _im
83def imax(arr, debug=False):
84 """index of maximum value"""
85 _im = np.argmax(arr)
86 if debug:
87 _logger.debug("Check: {0} = {1}".format(np.max(arr), arr[_im]))
88 return _im
91def lists_to_matrix(data, axis=None, **kws):
92 """Convert two lists of 1D arrays to a 2D matrix
94 Parameters
95 ----------
96 data : list of lists of 1D arrays
97 [
98 [x1, ... xN]
99 [z1, ... zN]
100 ]
101 axis : None or array 1D, optional
102 a reference axis used for the interpolation [None -> xdats[0]]
103 **kws : optional
104 keyword arguments for scipy.interpolate.interp1d()
106 Returns
107 -------
108 axis, outmat : arrays
109 """
110 assert len(data) == 2, "'data' should be a list of two lists"
111 xdats, zdats = data
112 assert isinstance(xdats, list), "'xdats' not a list"
113 assert isinstance(zdats, list), "'zdats' not a list"
114 assert len(xdats) == len(zdats), "lists of data not of the same length"
115 assert all(isinstance(z, np.ndarray) for z in zdats), "data in list must be arrays"
116 if axis is None:
117 axis = xdats[0]
118 assert isinstance(axis, np.ndarray), "axis must be array"
119 if all(z.size == axis.size for z in zdats):
120 #: all same size
121 return axis, np.array(zdats)
122 else:
123 #: interpolate
124 outmat = np.zeros((len(zdats), axis.size))
125 for idat, (xdat, zdat) in enumerate(zip(xdats, zdats)):
126 fdat = interp1d(xdat, zdat, **kws)
127 znew = fdat(axis)
128 outmat[idat] = znew
129 return axis, outmat
132def curves_to_matrix(curves, axis=None, **kws):
133 """Convert a list of curves to a 2D data matrix
135 Parameters
136 ----------
137 curves : list of lists
138 Curves format is the following:
139 [
140 [x1, y1, label1, info1],
141 ...
142 [xN, yN, labelN, infoN]
143 ]
144 axis : None or array 1D, optional
145 a reference axis used for the interpolation [None -> curves[0][0]]
146 **kws : optional
147 keyword arguments for func:`scipy.interpolate.interp1d`
149 Returns
150 -------
151 axis, outmat : arrays
152 """
153 assert isinstance(curves, list), "'curves' not a list"
154 assert all(
155 (isinstance(curve, list) and len(curve) == 4) for curve in curves
156 ), "curves should be lists of four elements"
157 if axis is None:
158 axis = curves[0][0]
159 assert isinstance(axis, np.ndarray), "axis must be array"
160 outmat = np.zeros((len(curves), axis.size))
161 for icurve, curve in enumerate(curves):
162 assert len(curve) == 4, "wrong curve format, should contain four elements"
163 x, y, label, info = curve
164 try:
165 assert isinstance(x, np.ndarray), "not array!"
166 assert isinstance(y, np.ndarray), "not array!"
167 except AssertionError:
168 _logger.error(
169 "[curve_to_matrix] Curve %d (%s) not containing arrays -> ADDING ZEROS",
170 icurve,
171 label,
172 )
173 continue
174 if (x.size == axis.size) and (y.size == axis.size):
175 #: all same length
176 outmat[icurve] = y
177 else:
178 #: interpolate
179 fdat = interp1d(x, y, **kws)
180 ynew = fdat(axis)
181 outmat[icurve] = ynew
182 _logger.debug("[curve_to_matrix] Curve %d (%s) interpolated", icurve, label)
183 return axis, outmat
186def sum_arrays_1d(data, axis=None, **kws):
187 """Sum list of 1D arrays or curves by interpolation on a reference axis
189 Parameters
190 ----------
191 data : lists of lists
192 data_fmt : str
193 define data format
194 - "curves" -> :func:`curves_to_matrix`
195 - "lists" -> :func:`curves_to_matrix`
197 Returns
198 -------
199 axis, zsum : 1D arrays
200 """
201 data_fmt = kws.pop("data_fmt", "curves")
202 if data_fmt == "curves":
203 ax, mat = curves_to_matrix(data, axis=axis, **kws)
204 elif data_fmt == "lists":
205 ax, mat = lists_to_matrix(data, axis=axis, **kws)
206 else:
207 raise NameError("'data_fmt' not understood")
208 return ax, np.sum(mat, 0)
211def avg_arrays_1d(data, axis=None, weights=None, **kws):
212 """Average list of 1D arrays or curves by interpolation on a reference axis
214 Parameters
215 ----------
216 data : lists of lists
217 data_fmt : str
218 define data format
219 - "curves" -> :func:`curves_to_matrix`
220 - "lists" -> :func:`lists_to_matrix`
221 weights : None or array
222 weights for the average
224 Returns
225 -------
226 axis, zavg : 1D arrays
227 np.average(zdats)
228 """
229 data_fmt = kws.pop("data_fmt", "curves")
230 if data_fmt == "curves":
231 ax, mat = curves_to_matrix(data, axis=axis, **kws)
232 elif data_fmt == "lists":
233 ax, mat = lists_to_matrix(data, axis=axis, **kws)
234 else:
235 raise NameError("'data_fmt' not understood")
236 return ax, np.average(mat, axis=0, weights=weights)
239def merge_arrays_1d(data, method="average", axis=None, weights=None, **kws):
240 """Merge a list of 1D arrays by interpolation on a reference axis
242 Parameters
243 ----------
244 data : lists of lists
245 data_fmt : str
246 define data format
247 - "curves" -> :func:`curves_to_matrix`
248 - "lists" -> :func:`curves_to_matrix`
249 axis : None or array 1D, optional
250 a reference axis used for the interpolation [None -> xdats[0]]
251 method : str, optional
252 method used to merge, available methods are:
253 - "average" : uses np.average()
254 - "sum" : uses np.sum()
255 weights : None or array 1D, optional
256 used if method == "average"
258 Returns
259 -------
260 axis, zmrg : 1D arrays
261 merge(zdats)
262 """
263 if method == "sum":
264 return sum_arrays_1d(data, axis=axis, **kws)
265 elif method == "average":
266 return avg_arrays_1d(data, axis=axis, weights=weights, **kws)
267 else:
268 raise NameError("wrong 'method': %s" % method)
271def rebin_piecewise_constant(x1, y1, x2):
272 """Rebin histogram values y1 from old bin edges x1 to new edges x2.
274 Code taken from: https://github.com/jhykes/rebin/blob/master/rebin.py
276 It follows the procedure described in Figure 18.13 (chapter 18.IV.B.
277 Spectrum Alignment, page 703) of Knoll [1]
279 References
280 ----------
281 [1] Glenn Knoll, Radiation Detection and Measurement, third edition,
282 Wiley, 2000.
284 Parameters
285 ----------
286 - x1 : m+1 array of old bin edges.
287 - y1 : m array of old histogram values.
288 This is the total number in each bin, not an average.
289 - x2 : n+1 array of new bin edges.
291 Returns
292 -------
293 - y2 : n array of rebinned histogram values.
294 """
295 x1 = np.asarray(x1)
296 y1 = np.asarray(y1)
297 x2 = np.asarray(x2)
299 # the fractional bin locations of the new bins in the old bins
300 i_place = np.interp(x2, x1, np.arange(len(x1)))
302 cum_sum = np.r_[[0], np.cumsum(y1)]
304 # calculate bins where lower and upper bin edges span
305 # greater than or equal to one original bin.
306 # This is the contribution from the 'intact' bins (not including the
307 # fractional start and end parts.
308 whole_bins = np.floor(i_place[1:]) - np.ceil(i_place[:-1]) >= 1.0
309 start = cum_sum[np.ceil(i_place[:-1]).astype(int)]
310 finish = cum_sum[np.floor(i_place[1:]).astype(int)]
312 y2 = np.where(whole_bins, finish - start, 0.0)
314 bin_loc = np.clip(np.floor(i_place).astype(int), 0, len(y1) - 1)
316 # fractional contribution for bins where the new bin edges are in the same
317 # original bin.
318 same_cell = np.floor(i_place[1:]) == np.floor(i_place[:-1])
319 frac = i_place[1:] - i_place[:-1]
320 contrib = frac * y1[bin_loc[:-1]]
321 y2 += np.where(same_cell, contrib, 0.0)
323 # fractional contribution for bins where the left and right bin edges are in
324 # different original bins.
325 different_cell = np.floor(i_place[1:]) > np.floor(i_place[:-1])
326 frac_left = np.ceil(i_place[:-1]) - i_place[:-1]
327 contrib = frac_left * y1[bin_loc[:-1]]
329 frac_right = i_place[1:] - np.floor(i_place[1:])
330 contrib += frac_right * y1[bin_loc[1:]]
332 y2 += np.where(different_cell, contrib, 0.0)
334 return y2
337def reject_outliers(data, m=5.189, return_ma=False):
338 """Reject outliers
340 Modified from: https://stackoverflow.com/questions/11686720/is-there-a-numpy-builtin-to-reject-outliers-from-a-list
341 See also: https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
342 """
343 if not isinstance(data, np.ndarray):
344 data = np.array(data)
345 d = np.abs(data - np.median(data))
346 mdev = np.median(d)
347 s = d / (mdev if mdev else 1.0)
348 mask = s < m
349 if return_ma:
350 imask = s > m
351 return np.ma.masked_array(data=data, mask=imask)
352 else:
353 return data[mask]