NFFT 3.5.3alpha
fastsum.c
Go to the documentation of this file.
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
25#include "config.h"
26
27#include <stdlib.h>
28#include <math.h>
29#ifdef HAVE_COMPLEX_H
30#include <complex.h>
31#endif
32
33#include "nfft3.h"
34#include "fastsum.h"
35#include "infft.h"
36
37// Required for test if (ths->k == one_over_x)
38#include "kernels.h"
39
46static int max_i(int a, int b)
47{
48 return a >= b ? a : b;
49}
50
52static R fak(int n)
53{
54 if (n <= 1)
55 return K(1.0);
56 else
57 return (R)(n) * fak(n - 1);
58}
59
61static R binom(int n, int m)
62{
63 return fak(n) / fak(m) / fak(n - m);
64}
65
67static R BasisPoly(int m, int r, R xx)
68{
69 int k;
70 R sum = K(0.0);
71
72 for (k = 0; k <= m - r; k++)
73 {
74 sum += binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k);
75 }
76 return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1))
77 / (R)(1 << (m + 1)) / fak(r); /* 1<<(m+1) = 2^(m+1) */
78}
79
81C regkern(kernel k, R xx, int p, const R *param, R a, R b)
82{
83 int r;
84 C sum = K(0.0);
85
86 if (xx < -K(0.5))
87 xx = -K(0.5);
88 if (xx > K(0.5))
89 xx = K(0.5);
90 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
91 {
92 return k(xx, 0, param);
93 }
94 else if (xx < -K(0.5) + b)
95 {
96 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
97 * BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b);
98 for (r = 0; r < p; r++)
99 {
100 sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param)
101 * BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b);
102 }
103 return sum;
104 }
105 else if ((xx > -a) && (xx < a))
106 {
107 for (r = 0; r < p; r++)
108 {
109 sum +=
110 POW(a, (R) r)
111 * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
112 + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
113 * (r & 1 ? -1 : 1));
114 }
115 return sum;
116 }
117 else if (xx > K(0.5) - b)
118 {
119 sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
120 * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
121 for (r = 0; r < p; r++)
122 {
123 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
124 * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
125 }
126 return sum;
127 }
128 return k(xx, 0, param);
129}
130
134static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
135{
136 int r;
137 C sum = K(0.0);
138
139 if (xx < -K(0.5))
140 xx = -K(0.5);
141 if (xx > K(0.5))
142 xx = K(0.5);
143 if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
144 {
145 return k(xx, 0, param);
146 }
147 else if ((xx > -a) && (xx < a))
148 {
149 for (r = 0; r < p; r++)
150 {
151 sum +=
152 POW(a, (R) r)
153 * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
154 + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
155 * (r & 1 ? -1 : 1));
156 }
157 return sum;
158 }
159 else if (xx < -K(0.5) + b)
160 {
161 for (r = 0; r < p; r++)
162 {
163 sum += POW(b, (R) r)
164 * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx + K(0.5)) / b)
165 + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx + K(0.5)) / b)
166 * (r & 1 ? -1 : 1));
167 }
168 return sum;
169 }
170 else if (xx > K(0.5) - b)
171 {
172 for (r = 0; r < p; r++)
173 {
174 sum += POW(b, (R) r)
175 * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx - K(0.5)) / b)
176 + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx - K(0.5)) / b)
177 * (r & 1 ? -1 : 1));
178 }
179 return sum;
180 }
181 return k(xx, 0, param);
182}
183
185//static C regkern2(kernel k, R xx, int p, const R *param, R a, R b)
186//{
187// int r;
188// C sum = K(0.0);
189//
190// xx = FABS(xx);
191//
192// if (xx > K(0.5))
193// {
194// for (r = 0; r < p; r++)
195// {
196// sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
197// * (BasisPoly(p - 1, r, 0) + BasisPoly(p - 1, r, 0));
198// }
199// return sum;
200// }
201// else if ((a <= xx) && (xx <= K(0.5) - b))
202// {
203// return k(xx, 0, param);
204// }
205// else if (xx < a)
206// {
207// for (r = 0; r < p; r++)
208// {
209// sum += POW(-a, (R) r) * k(a, r, param)
210// * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
211// }
212// return sum;
213// }
214// else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
215// {
216// for (r = 0; r < p; r++)
217// {
218// sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
219// * (BasisPoly(p - 1, r, (xx - K(0.5)) / b)
220// + BasisPoly(p - 1, r, -(xx - K(0.5)) / b));
221// }
222// return sum;
223// }
224// return K(0.0);
225//}
226
230static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
231{
232 int r;
233 C sum = K(0.0);
234
235 xx = FABS(xx);
236
237 if (xx >= K(0.5))
238 {
239 /*return kern(typ,c,0,K(0.5));*/
240 xx = K(0.5);
241 }
242 /* else */
243 if ((a <= xx) && (xx <= K(0.5) - b))
244 {
245 return k(xx, 0, param);
246 }
247 else if (xx < a)
248 {
249 for (r = 0; r < p; r++)
250 {
251 sum += POW(-a, (R) r) * k(a, r, param)
252 * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
253 }
254 /*sum=kern(typ,c,0,xx); */
255 return sum;
256 }
257 else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
258 {
259 sum = k(K(0.5), 0, param) * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
260 /* sum=regkern2(typ,c,p,a,b, K(0.5))*BasisPoly(p-1,0,-K(2.0)*xx/b+(K(1.0)-b)/b); */
261 for (r = 0; r < p; r++)
262 {
263 sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
264 * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
265 }
266 return sum;
267 }
268 return K(0.0);
269}
270
272//static C linintkern(const R x, const C *Add, const int Ad, const R a)
273//{
274// R c, c1, c3;
275// int r;
276// C f1, f2;
277//
278// c = x * Ad / a;
279// r = (int)(LRINT(c));
280// r = abs(r);
281// f1 = Add[r];
282// f2 = Add[r + 1];
283// c = FABS(c);
284// c1 = c - r;
285// c3 = c1 - K(1.0);
286// return (-f1 * c3 + f2 * c1);
287//}
288//
289//static C quadrintkern(const R x, const C *Add, const int Ad, const R a)
290//{
291// R c, c1, c2, c3;
292// int r;
293// C f0, f1, f2;
294//
295// c = x * Ad / a;
296// r = (int)(LRINT(c));
297// r = abs(r);
298// if (r == 0)
299// {
300// f0 = Add[r + 1];
301// f1 = Add[r];
302// f2 = Add[r + 1];
303// }
304// else
305// {
306// f0 = Add[r - 1];
307// f1 = Add[r];
308// f2 = Add[r + 1];
309// }
310// c = FABS(c);
311// c1 = c - r;
312// c2 = c1 + K(1.0);
313// c3 = c1 - K(1.0);
314// return (f0 * c1 * c3 / K(2.0) - f1 * c2 * c3 + f2 * c2 * c1 / K(2.0));
315//}
316
318C kubintkern(const R x, const C *Add, const int Ad, const R a)
319{
320 R c, c1, c2, c3, c4;
321 int r;
322 C f0, f1, f2, f3;
323 c = x * (R)(Ad) / a;
324 r = (int)(LRINT(c));
325 r = abs(r);
326 if (r == 0)
327 {
328 f0 = Add[r + 1];
329 f1 = Add[r];
330 f2 = Add[r + 1];
331 f3 = Add[r + 2];
332 }
333 else
334 {
335 f0 = Add[r - 1];
336 f1 = Add[r];
337 f2 = Add[r + 1];
338 f3 = Add[r + 2];
339 }
340 c = FABS(c);
341 c1 = c - (R)(r);
342 c2 = c1 + K(1.0);
343 c3 = c1 - K(1.0);
344 c4 = c1 - K(2.0);
345 /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
346 f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
347 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
348 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
349}
350
352static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
353{
354 R c, c1, c2, c3, c4;
355 int r;
356 C f0, f1, f2, f3;
357 Add += 2;
358 c = (x + a) * (R)(Ad) / K(2.0) / a;
359 r = (int)(LRINT(c));
360 r = abs(r);
361 /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
362 else */
363 {
364 f0 = Add[r - 1];
365 f1 = Add[r];
366 f2 = Add[r + 1];
367 f3 = Add[r + 2];
368 }
369 c = FABS(c);
370 c1 = c - (R)(r);
371 c2 = c1 + K(1.0);
372 c3 = c1 - K(1.0);
373 c4 = c1 - K(2.0);
374 /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
375 f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
376 return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
377 - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
378}
379
381static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
382{
383 int lpos = 0;
384 int rpos = N - 1;
385 /*R pivot=x[((N-1)/2)*d+t];*/
386 R pivot = x[(N / 2) * d + t];
387
388 int k;
389 R temp1;
390 C temp2;
391 int temp_int;
392
393 while (lpos <= rpos)
394 {
395 while (x[lpos * d + t] < pivot)
396 lpos++;
397 while (x[rpos * d + t] > pivot)
398 rpos--;
399 if (lpos <= rpos)
400 {
401 for (k = 0; k < d; k++)
402 {
403 temp1 = x[lpos * d + k];
404 x[lpos * d + k] = x[rpos * d + k];
405 x[rpos * d + k] = temp1;
406 }
407 temp2 = alpha[lpos];
408 alpha[lpos] = alpha[rpos];
409 alpha[rpos] = temp2;
410
411 if (permutation_x_alpha)
412 {
413 temp_int = permutation_x_alpha[lpos];
414 permutation_x_alpha[lpos] = permutation_x_alpha[rpos];
415 permutation_x_alpha[rpos] = temp_int;
416 }
417
418 lpos++;
419 rpos--;
420 }
421 }
422 if (0 < rpos)
423 quicksort(d, t, x, alpha, permutation_x_alpha, rpos + 1);
424 if (lpos < N - 1)
425 quicksort(d, t, x + lpos * d, alpha + lpos, permutation_x_alpha ? permutation_x_alpha + lpos : NULL, N - lpos);
426}
427
429static void BuildBox(fastsum_plan *ths)
430{
431 int t, l;
432 int *box_index;
433 R val[ths->d];
434
435 box_index = (int *) NFFT(malloc)((size_t)(ths->box_count) * sizeof(int));
436 for (t = 0; t < ths->box_count; t++)
437 box_index[t] = 0;
438
439 for (l = 0; l < ths->N_total; l++)
440 {
441 int ind = 0;
442 for (t = 0; t < ths->d; t++)
443 {
444 val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
445 ind *= ths->box_count_per_dim;
446 ind += (int) (val[t] / ths->eps_I);
447 }
448 box_index[ind]++;
449 }
450
451 ths->box_offset[0] = 0;
452 for (t = 1; t <= ths->box_count; t++)
453 {
454 ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1];
455 box_index[t - 1] = ths->box_offset[t - 1];
456 }
457
458 for (l = 0; l < ths->N_total; l++)
459 {
460 int ind = 0;
461 for (t = 0; t < ths->d; t++)
462 {
463 val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
464 ind *= ths->box_count_per_dim;
465 ind += (int) (val[t] / ths->eps_I);
466 }
467
468 ths->box_alpha[box_index[ind]] = ths->alpha[l];
469
470 for (t = 0; t < ths->d; t++)
471 {
472 ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
473 }
474 box_index[ind]++;
475 }
476 NFFT(free)(box_index);
477}
478
480static inline C calc_SearchBox(int d, R *y, R *x, C *alpha, int start,
481 int end_lt, const C *Add, const int Ad, int p, R a, const kernel k,
482 const R *param, const unsigned flags)
483{
484 C result = K(0.0);
485
486 int m, l;
487 R r;
488
489 for (m = start; m < end_lt; m++)
490 {
491 if (d == 1)
492 {
493 r = y[0] - x[m];
494 }
495 else
496 {
497 r = K(0.0);
498 for (l = 0; l < d; l++)
499 r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]);
500 r = SQRT(r);
501 }
502 if (FABS(r) < a)
503 {
504 result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
505 if (d == 1)
506 {
507 if (flags & EXACT_NEARFIELD)
508 result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
509 else
510 result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
511 }
512 else
513 {
514 if (flags & EXACT_NEARFIELD)
515 result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
516 else
517#if defined(NF_KUB)
518 result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
519#elif defined(NF_QUADR)
520 result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
521#elif defined(NF_LIN)
522 result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
523#else
524#error define interpolation method
525#endif
526 }
527 }
528 }
529 return result;
530}
531
533static C SearchBox(R *y, fastsum_plan *ths)
534{
535 C val = K(0.0);
536 int t;
537 int y_multiind[ths->d];
538 int multiindex[ths->d];
539 int y_ind;
540
541 for (t = 0; t < ths->d; t++)
542 {
543 y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->eps_B / K(2.0)) / ths->eps_I));
544 }
545
546 if (ths->d == 1)
547 {
548 for (y_ind = max_i(0, y_multiind[0] - 1);
549 y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++)
550 {
551 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
552 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, ths->Ad,
553 ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
554 }
555 }
556 else if (ths->d == 2)
557 {
558 for (multiindex[0] = max_i(0, y_multiind[0] - 1);
559 multiindex[0] < ths->box_count_per_dim
560 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
561 for (multiindex[1] = max_i(0, y_multiind[1] - 1);
562 multiindex[1] < ths->box_count_per_dim
563 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
564 {
565 y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
566 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
567 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
568 ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
569 }
570 }
571 else if (ths->d == 3)
572 {
573 for (multiindex[0] = max_i(0, y_multiind[0] - 1);
574 multiindex[0] < ths->box_count_per_dim
575 && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
576 for (multiindex[1] = max_i(0, y_multiind[1] - 1);
577 multiindex[1] < ths->box_count_per_dim
578 && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
579 for (multiindex[2] = max_i(0, y_multiind[2] - 1);
580 multiindex[2] < ths->box_count_per_dim
581 && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++)
582 {
583 y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1])
584 * ths->box_count_per_dim + multiindex[2];
585 val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
586 ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
587 ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param,
588 ths->flags);
589 }
590 }
591 else
592 {
593 return 0.0/0.0; //exit(EXIT_FAILURE);
594 }
595 return val;
596}
597
599static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
600{
601 if (N > 1)
602 {
603 int m = N / 2;
604
605 quicksort(d, t, x, alpha, permutation_x_alpha, N);
606
607 BuildTree(d, (t + 1) % d, x, alpha, permutation_x_alpha, m);
608 BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), permutation_x_alpha ? permutation_x_alpha + (m + 1) : NULL, N - m - 1);
609 }
610}
611
613static C SearchTree(const int d, const int t, const R *x, const C *alpha,
614 const R *xmin, const R *xmax, const int N, const kernel k, const R *param,
615 const int Ad, const C *Add, const int p, const unsigned flags)
616{
617 if (N == 0)
618 {
619 return K(0.0);
620 }
621 else
622 {
623 int m = N / 2;
624 R Min = xmin[t];
625 R Max = xmax[t];
626 R Median = x[m * d + t];
627 R a = FABS(Max - Min) / 2;
628 int l;
629 int E = 0;
630 R r;
631
632 if (Min > Median)
633 return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
634 xmax, N - m - 1, k, param, Ad, Add, p, flags);
635 else if (Max < Median)
636 return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad,
637 Add, p, flags);
638 else
639 {
640 C result = K(0.0);
641 E = 0;
642
643 for (l = 0; l < d; l++)
644 {
645 if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l])
646 E++;
647 }
648
649 if (E == d)
650 {
651 if (d == 1)
652 {
653 r = xmin[0] + a - x[m]; /* remember: xmin+a = y */
654 }
655 else
656 {
657 r = K(0.0);
658 for (l = 0; l < d; l++)
659 r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]); /* remember: xmin+a = y */
660 r = SQRT(r);
661 }
662 if (FABS(r) < a)
663 {
664 result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
665 if (d == 1)
666 {
667 if (flags & EXACT_NEARFIELD)
668 result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
669 else
670 result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
671 }
672 else
673 {
674 if (flags & EXACT_NEARFIELD)
675 result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
676 else
677#if defined(NF_KUB)
678 result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
679#elif defined(NF_QUADR)
680 result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
681#elif defined(NF_LIN)
682 result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
683#else
684#error define interpolation method
685#endif
686 }
687 }
688 }
689 result += SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
690 xmax, N - m - 1, k, param, Ad, Add, p, flags);
691 result += SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add,
692 p, flags);
693 return result;
694 }
695 }
696}
697
698static void fastsum_precompute_kernel(fastsum_plan *ths)
699{
700 int j, k, t;
701 INT N[ths->d];
702 int n_total;
703#ifdef MEASURE_TIME
704 ticks t0, t1;
705#endif
706
707 ths->MEASURE_TIME_t[0] = K(0.0);
708
709#ifdef MEASURE_TIME
710 t0 = getticks();
711#endif
713 if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
714 {
715 if (ths->d == 1)
716#ifdef _OPENMP
717 #pragma omp parallel for default(shared) private(k)
718#endif
719 for (k = -ths->Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++)
720 ths->Add[k + ths->Ad / 2 + 2] = regkern1(ths->k,
721 ths->eps_I * (R) k / (R)(ths->Ad) * K(2.0), ths->p, ths->kernel_param,
722 ths->eps_I, ths->eps_B);
723 else
724#ifdef _OPENMP
725 #pragma omp parallel for default(shared) private(k)
726#endif
727 for (k = 0; k <= ths->Ad + 2; k++)
728 ths->Add[k] = regkern3(ths->k, ths->eps_I * (R) k / (R)(ths->Ad), ths->p,
729 ths->kernel_param, ths->eps_I, ths->eps_B);
730 }
731#ifdef MEASURE_TIME
732 t1 = getticks();
733 ths->MEASURE_TIME_t[0] += NFFT(elapsed_seconds)(t1,t0);
734#endif
735
736#ifdef MEASURE_TIME
737 t0 = getticks();
738#endif
740 n_total = 1;
741 for (t = 0; t < ths->d; t++)
742 n_total *= ths->n;
743
744#ifdef _OPENMP
745 #pragma omp parallel for default(shared) private(j,k,t)
746#endif
747 for (j = 0; j < n_total; j++)
748 {
749 if (ths->d == 1)
750 ths->b[j] = regkern1(ths->k, (R) - (j / (R)(ths->n) - K(0.5)), ths->p,
751 ths->kernel_param, ths->eps_I, ths->eps_B) / (R)(n_total);
752 else
753 {
754 k = j;
755 ths->b[j] = K(0.0);
756 for (t = 0; t < ths->d; t++)
757 {
758 ths->b[j] += ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5))
759 * ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5));
760 k = k / (ths->n);
761 }
762 ths->b[j] = regkern3(ths->k, SQRT(CREAL(ths->b[j])), ths->p, ths->kernel_param,
763 ths->eps_I, ths->eps_B) / (R)(n_total);
764 }
765 }
766
767 for (t = 0; t < ths->d; t++)
768 N[t] = ths->n;
769
770 NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
771 FFTW(execute)(ths->fft_plan);
772 NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
773#ifdef MEASURE_TIME
774 t1 = getticks();
775 ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
776#endif
777}
778
779void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param,
780 unsigned flags, int nn, int p, R eps_I, R eps_B)
781{
782 int t;
783 int N[d];
784 int n_total;
785#ifdef _OPENMP
786 int nthreads = NFFT(get_num_threads)();
787#endif
788
789 ths->d = d;
790
791 ths->k = k;
792 ths->kernel_param = param;
793
794 ths->flags = flags;
795
796 ths->p = p;
797 ths->eps_I = eps_I; /* =(R)ths->p/(R)nn; */
798 ths->eps_B = eps_B; /* =K(1.0)/K(16.0); */
801 if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
802 {
803 if (ths->d == 1)
804 {
805 ths->Ad = 4 * (ths->p) * (ths->p);
806 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 5) * (sizeof(C)));
807 }
808 else
809 {
810 if (ths->k == one_over_x)
811 {
812 R delta = K(1e-8);
813 switch (p)
814 {
815 case 2:
816 delta = K(1e-3);
817 break;
818 case 3:
819 delta = K(1e-4);
820 break;
821 case 4:
822 delta = K(1e-5);
823 break;
824 case 5:
825 delta = K(1e-6);
826 break;
827 case 6:
828 delta = K(1e-6);
829 break;
830 case 7:
831 delta = K(1e-7);
832 break;
833 default:
834 delta = K(1e-8);
835 }
836
837#if defined(NF_KUB)
838 ths->Ad = max_i(10, (int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0))))));
839 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
840#elif defined(NF_QUADR)
841 ths->Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0)))));
842 ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
843#elif defined(NF_LIN)
844 ths->Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0)))));
845 ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
846#else
847#error define NF_LIN or NF_QUADR or NF_KUB
848#endif
849 }
850 else
851 {
852 ths->Ad = 2 * (ths->p) * (ths->p);
853 ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
854 }
855 } /* multi-dimensional case */
856 } /* !EXACT_NEARFIELD == spline approximation in near field AND eps_I > 0 */
857
858 ths->n = nn;
859 for (t = 0; t < d; t++)
860 {
861 N[t] = nn;
862 }
863
865 n_total = 1;
866 for (t = 0; t < d; t++)
867 n_total *= nn;
868
869 ths->b = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
870 ths->f_hat = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
871#ifdef _OPENMP
872 #pragma omp critical (nfft_omp_critical_fftw_plan)
873 {
874 FFTW(plan_with_nthreads)(nthreads);
875#endif
876
877 ths->fft_plan = FFTW(plan_dft)(d, N, ths->b, ths->b, FFTW_FORWARD,
878 FFTW_ESTIMATE);
879
880#ifdef _OPENMP
881}
882#endif
883
884 fastsum_precompute_kernel(ths);
885}
886
887void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
888{
889 int t;
890 int N[ths->d], n[ths->d];
891 unsigned sort_flags_adjoint = 0U;
892
893 if (ths->d > 1)
894 {
895#ifdef _OPENMP
896 sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
897#else
898 sort_flags_adjoint = NFFT_SORT_NODES;
899#endif
900 }
901
902 ths->N_total = N_total;
903
904 ths->x = (R *) NFFT(malloc)((size_t)(ths->d * N_total) * (sizeof(R)));
905 ths->alpha = (C *) NFFT(malloc)((size_t)(N_total) * (sizeof(C)));
906
908 for (t = 0; t < ths->d; t++)
909 {
910 N[t] = ths->n;
911 n[t] = nn_oversampled;
912 }
913
914 NFFT(init_guru)(&(ths->mv1), ths->d, N, N_total, n, m,
915 sort_flags_adjoint |
916 PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
917 | ((ths->d == 1) ? FFT_OUT_OF_PLACE : 0U),
918 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
919 ths->mv1.x = ths->x;
920 ths->mv1.f = ths->alpha;
921 ths->mv1.f_hat = ths->f_hat;
922
923 ths->box_offset = NULL;
924 ths->box_alpha = NULL;
925 ths->box_x = NULL;
926 ths->permutation_x_alpha = NULL;
927
928 if (ths->flags & NEARFIELD_BOXES)
929 {
930 if (ths->eps_I > 0.0)
931 {
932 ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->eps_B) / ths->eps_I))) + 1;
933 ths->box_count = 1;
934 for (t = 0; t < ths->d; t++)
935 ths->box_count *= ths->box_count_per_dim;
936
937 ths->box_offset = (int *) NFFT(malloc)((size_t)(ths->box_count + 1) * sizeof(int));
938
939 ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(C)));
940
941 ths->box_x = (R *) NFFT(malloc)((size_t)(ths->d * ths->N_total) * sizeof(R));
942 } /* eps_I > 0 */
943 } /* NEARFIELD_BOXES */
944 else
945 {
946 if ((ths->flags & STORE_PERMUTATION_X_ALPHA) && (ths->eps_I > 0.0))
947 {
948 ths->permutation_x_alpha = (int *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(int)));
949 for (int i=0; i<ths->N_total; i++)
950 ths->permutation_x_alpha[i] = i;
951 }
952 } /* search tree */
953}
954
955void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
956{
957 int t;
958 int N[ths->d], n[ths->d];
959 unsigned sort_flags_trafo = 0U;
960
961 if (ths->d > 1)
962 sort_flags_trafo = NFFT_SORT_NODES;
963
964 ths->M_total = M_total;
965
966 ths->y = (R *) NFFT(malloc)((size_t)(ths->d * M_total) * (sizeof(R)));
967 ths->f = (C *) NFFT(malloc)((size_t)(M_total) * (sizeof(C)));
968
970 for (t = 0; t < ths->d; t++)
971 {
972 N[t] = ths->n;
973 n[t] = nn_oversampled;
974 }
975
976 NFFT(init_guru)(&(ths->mv2), ths->d, N, M_total, n, m,
977 sort_flags_trafo |
978 PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
979 | ((ths->d == 1) ? FFT_OUT_OF_PLACE : 0U),
980 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
981 ths->mv2.x = ths->y;
982 ths->mv2.f = ths->f;
983 ths->mv2.f_hat = ths->f_hat;
984}
985
987void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total,
988 kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
989{
990 fastsum_init_guru_kernel(ths, d, k, param, flags, nn, p, eps_I, eps_B);
991 fastsum_init_guru_source_nodes(ths, N_total, 2*nn, m);
992 fastsum_init_guru_target_nodes(ths, M_total, 2*nn, m);
993}
994
997{
998 NFFT(free)(ths->x);
999 NFFT(free)(ths->alpha);
1000
1001 NFFT(finalize)(&(ths->mv1));
1002
1003 if (ths->flags & NEARFIELD_BOXES)
1004 {
1005 if (ths->eps_I > 0.0)
1006 {
1007 NFFT(free)(ths->box_offset);
1008 NFFT(free)(ths->box_alpha);
1009 NFFT(free)(ths->box_x);
1010 }
1011 } /* NEARFIELD_BOXES */
1012 else
1013 {
1014 if (ths->permutation_x_alpha)
1015 NFFT(free)(ths->permutation_x_alpha);
1016 } /* search tree */
1017}
1018
1021{
1022 NFFT(free)(ths->y);
1023 NFFT(free)(ths->f);
1024
1025 NFFT(finalize)(&(ths->mv2));
1026}
1027
1030{
1031 if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
1032 NFFT(free)(ths->Add);
1033
1034#ifdef _OPENMP
1035 #pragma omp critical (nfft_omp_critical_fftw_plan)
1036 {
1037#endif
1038 FFTW(destroy_plan)(ths->fft_plan);
1039#ifdef _OPENMP
1040}
1041#endif
1042
1043 NFFT(free)(ths->b);
1044 NFFT(free)(ths->f_hat);
1045}
1046
1054
1057{
1058 int j, k;
1059 int t;
1060 R r;
1061
1062#ifdef _OPENMP
1063 #pragma omp parallel for default(shared) private(j,k,t,r)
1064#endif
1065 for (j = 0; j < ths->M_total; j++)
1066 {
1067 ths->f[j] = K(0.0);
1068 for (k = 0; k < ths->N_total; k++)
1069 {
1070 if (ths->d == 1)
1071 r = ths->y[j] - ths->x[k];
1072 else
1073 {
1074 r = K(0.0);
1075 for (t = 0; t < ths->d; t++)
1076 r += (ths->y[j * ths->d + t] - ths->x[k * ths->d + t])
1077 * (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]);
1078 r = SQRT(r);
1079 }
1080 ths->f[j] += ths->alpha[k] * ths->k(r, 0, ths->kernel_param);
1081 }
1082 }
1083}
1084
1087{
1088#ifdef MEASURE_TIME
1089 ticks t0, t1;
1090#endif
1091
1092 ths->MEASURE_TIME_t[1] = K(0.0);
1093 ths->MEASURE_TIME_t[3] = K(0.0);
1094
1095#ifdef MEASURE_TIME
1096 t0 = getticks();
1097#endif
1098
1099 if (ths->eps_I > 0.0)
1100 {
1101 if (ths->flags & NEARFIELD_BOXES)
1102 BuildBox(ths);
1103 else
1105 BuildTree(ths->d, 0, ths->x, ths->alpha, ths->permutation_x_alpha, ths->N_total);
1106 } /* eps_I > 0 */
1107
1108#ifdef MEASURE_TIME
1109 t1 = getticks();
1110 ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
1111#endif
1112
1113#ifdef MEASURE_TIME
1114 t0 = getticks();
1115#endif
1117// for (k = 0; k < ths->mv1.M_total; k++)
1118// for (t = 0; t < ths->mv1.d; t++)
1119// ths->mv1.x[ths->mv1.d * k + t] = -ths->x[ths->mv1.d * k + t]; /* note the factor -1 for transposed transform instead of adjoint*/
1120
1122 if (ths->mv1.flags & PRE_LIN_PSI)
1123 NFFT(precompute_lin_psi)(&(ths->mv1));
1124
1125 if (ths->mv1.flags & PRE_PSI)
1126 NFFT(precompute_psi)(&(ths->mv1));
1127
1128 if (ths->mv1.flags & PRE_FULL_PSI)
1129 NFFT(precompute_full_psi)(&(ths->mv1));
1130#ifdef MEASURE_TIME
1131 t1 = getticks();
1132 ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
1133#endif
1134
1135// /** init Fourier coefficients */
1136// for (k = 0; k < ths->mv1.M_total; k++)
1137// ths->mv1.f[k] = ths->alpha[k];
1138}
1139
1142{
1143#ifdef MEASURE_TIME
1144 ticks t0, t1;
1145#endif
1146
1147 ths->MEASURE_TIME_t[2] = K(0.0);
1148
1149#ifdef MEASURE_TIME
1150 t0 = getticks();
1151#endif
1153// for (j = 0; j < ths->mv2.M_total; j++)
1154// for (t = 0; t < ths->mv2.d; t++)
1155// ths->mv2.x[ths->mv2.d * j + t] = -ths->y[ths->mv2.d * j + t]; /* note the factor -1 for conjugated transform instead of standard*/
1156
1158 if (ths->mv2.flags & PRE_LIN_PSI)
1159 NFFT(precompute_lin_psi)(&(ths->mv2));
1160
1161 if (ths->mv2.flags & PRE_PSI)
1162 NFFT(precompute_psi)(&(ths->mv2));
1163
1164 if (ths->mv2.flags & PRE_FULL_PSI)
1165 NFFT(precompute_full_psi)(&(ths->mv2));
1166#ifdef MEASURE_TIME
1167 t1 = getticks();
1168 ths->MEASURE_TIME_t[2] += NFFT(elapsed_seconds)(t1,t0);
1169#endif
1170}
1171
1178
1181{
1182 int j, k, t;
1183#ifdef MEASURE_TIME
1184 ticks t0, t1;
1185#endif
1186
1187 ths->MEASURE_TIME_t[4] = K(0.0);
1188 ths->MEASURE_TIME_t[5] = K(0.0);
1189 ths->MEASURE_TIME_t[6] = K(0.0);
1190 ths->MEASURE_TIME_t[7] = K(0.0);
1191
1192#ifdef MEASURE_TIME
1193 t0 = getticks();
1194#endif
1196 NFFT(adjoint)(&(ths->mv1));
1197#ifdef MEASURE_TIME
1198 t1 = getticks();
1199 ths->MEASURE_TIME_t[4] += NFFT(elapsed_seconds)(t1,t0);
1200#endif
1201
1202#ifdef MEASURE_TIME
1203 t0 = getticks();
1204#endif
1206#ifdef _OPENMP
1207 #pragma omp parallel for default(shared) private(k)
1208#endif
1209 for (k = 0; k < ths->mv2.N_total; k++)
1210 ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
1211#ifdef MEASURE_TIME
1212 t1 = getticks();
1213 ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
1214#endif
1215
1216#ifdef MEASURE_TIME
1217 t0 = getticks();
1218#endif
1220 NFFT(trafo)(&(ths->mv2));
1221#ifdef MEASURE_TIME
1222 t1 = getticks();
1223 ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
1224#endif
1225
1226#ifdef MEASURE_TIME
1227 t0 = getticks();
1228#endif
1229
1231#ifdef _OPENMP
1232 #pragma omp parallel for default(shared) private(j)
1233#endif
1234 for (j = 0; j < ths->M_total; j++)
1235 ths->f[j] = ths->mv2.f[j];
1236
1237 if (ths->eps_I > 0.0)
1238 {
1240 #ifdef _OPENMP
1241 #pragma omp parallel for default(shared) private(j,k,t)
1242 #endif
1243 for (j = 0; j < ths->M_total; j++)
1244 {
1245 R ymin[ths->d], ymax[ths->d];
1247 if (ths->flags & NEARFIELD_BOXES)
1248 ths->f[j] += SearchBox(ths->y + ths->d * j, ths);
1249 else
1250 {
1251 for (t = 0; t < ths->d; t++)
1252 {
1253 ymin[t] = ths->y[ths->d * j + t] - ths->eps_I;
1254 ymax[t] = ths->y[ths->d * j + t] + ths->eps_I;
1255 }
1256 ths->f[j]
1257 += SearchTree(ths->d, 0, ths->x, ths->alpha, ymin, ymax, ths->N_total,
1258 ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
1259 }
1260 }
1261 }
1262
1263#ifdef MEASURE_TIME
1264 t1 = getticks();
1265 ths->MEASURE_TIME_t[7] += NFFT(elapsed_seconds)(t1,t0);
1266#endif
1267}
1268/* \} */
1269
1270/* fastsum.c */
Header file for the fast NFFT-based summation algorithm.
int N_total
number of source knots
Definition fastsum.h:88
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition fastsum.c:1173
static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
recursive sort of source knots dimension by dimension to get tree structure
Definition fastsum.c:599
int M_total
number of target knots
Definition fastsum.h:89
void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
initialize target nodes dependent part of fast summation plan
Definition fastsum.c:955
static R binom(int n, int m)
binomial coefficient
Definition fastsum.c:61
int p
degree of smoothness of regularization
Definition fastsum.h:112
int n
FS__ - fast summation.
Definition fastsum.h:108
int Ad
near field
Definition fastsum.h:120
R * kernel_param
parameters for kernel function
Definition fastsum.h:98
#define STORE_PERMUTATION_X_ALPHA
If this flag is set, and eps_I > 0.0 and NEARFIELD_BOXES is not set, then the vector permutation_x_al...
Definition fastsum.h:79
void fastsum_finalize_target_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:1020
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition fastsum.c:987
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:94
void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param, unsigned flags, int nn, int p, R eps_I, R eps_B)
initialize node independent part of fast summation plan
Definition fastsum.c:779
static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
cubic spline interpolation in near field with arbitrary kernels
Definition fastsum.c:352
static int max_i(int a, int b)
max
Definition fastsum.c:46
C * f_hat
Fourier coefficients of nfft plans.
Definition fastsum.h:110
static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B periodized (used in 1D)
Definition fastsum.c:134
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition fastsum.h:134
C * b
expansion coefficients
Definition fastsum.h:109
static void BuildBox(fastsum_plan *ths)
initialize box-based search data structures
Definition fastsum.c:429
C * f
target evaluations
Definition fastsum.h:92
C * alpha
source coefficients
Definition fastsum.h:91
static C SearchTree(const int d, const int t, const R *x, const C *alpha, const R *xmin, const R *xmax, const int N, const kernel k, const R *param, const int Ad, const C *Add, const int p, const unsigned flags)
fast search in tree of source knots for near field computation
Definition fastsum.c:613
R eps_B
outer boundary
Definition fastsum.h:114
static C SearchBox(R *y, fastsum_plan *ths)
box-based near field correction
Definition fastsum.c:533
void fastsum_precompute_target_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition fastsum.c:1141
static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel for even kernels with K_I even and K_B mirrored
Definition fastsum.c:230
void fastsum_precompute_source_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition fastsum.c:1086
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition fastsum.c:1180
void fastsum_exact(fastsum_plan *ths)
direct computation of sums
Definition fastsum.c:1056
kernel k
kernel function
Definition fastsum.h:97
void fastsum_finalize_kernel(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:1029
static R fak(int n)
factorial
Definition fastsum.c:52
unsigned flags
flags precomp.
Definition fastsum.h:100
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:1048
C * Add
spline values
Definition fastsum.h:121
int * permutation_x_alpha
permutation vector of source nodes if STORE_PERMUTATION_X_ALPHA is set
Definition fastsum.h:132
#define EXACT_NEARFIELD
Constant symbols.
Definition fastsum.h:73
C regkern(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B smooth to zero
Definition fastsum.c:81
static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
quicksort algorithm for source knots and associated coefficients
Definition fastsum.c:381
static C calc_SearchBox(int d, R *y, R *x, C *alpha, int start, int end_lt, const C *Add, const int Ad, int p, R a, const kernel k, const R *param, const unsigned flags)
inner computation function for box-based near field correction
Definition fastsum.c:480
int d
api
Definition fastsum.h:86
void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
initialize source nodes dependent part of fast summation plan
Definition fastsum.c:887
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition kernels.c:233
void fastsum_finalize_source_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition fastsum.c:996
static R BasisPoly(int m, int r, R xx)
basis polynomial for regularized kernel
Definition fastsum.c:67
C kubintkern(const R x, const C *Add, const int Ad, const R a)
linear spline interpolation in near field with even kernels
Definition fastsum.c:318
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition fastsum.h:95
R eps_I
inner boundary
Definition fastsum.h:113
#define PRE_FULL_PSI
Definition nfft3.h:192
#define FFT_OUT_OF_PLACE
Definition nfft3.h:196
#define PRE_PSI
Definition nfft3.h:191
#define PRE_LIN_PSI
Definition nfft3.h:189
#define FFTW_INIT
Definition nfft3.h:197
#define PRE_PHI_HUT
Definition nfft3.h:187
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
Internal header file for auxiliary definitions and functions.
Header file with predefined kernels for the fast summation algorithm.
Header file for the nfft3 library.
plan for fast summation algorithm
Definition fastsum.h:83