Actual source code: filtlan.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    This file is an adaptation of several subroutines from FILTLAN, the
 12:    Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.

 14:    More information at:
 15:    https://www-users.cs.umn.edu/~saad/software/filtlan

 17:    References:

 19:        [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
 20:            eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
 21: */

 23: #include <slepc/private/stimpl.h>
 24: #include "filter.h"

 26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
 27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
 28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);

 30: /* ////////////////////////////////////////////////////////////////////////////
 31:    //    Newton - Hermite Polynomial Interpolation
 32:    //////////////////////////////////////////////////////////////////////////// */

 34: /*
 35:    FILTLAN function NewtonPolynomial

 37:    build P(z) by Newton's divided differences in the form
 38:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
 39:    such that P(x(i)) = y(i) for i=1,...,n, where
 40:        x,y are input vectors of length n, and a is the output vector of length n
 41:    if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
 42:        and the Hermite polynomial interpolation is applied
 43:    in general, if there are k x(i)'s with the same value x0, then
 44:        the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
 45: */
 46: static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
 47: {
 48:   PetscReal      d,*sx=x,*sy=y;
 49:   PetscInt       j,k;

 51:   PetscArraycpy(sf,sy,n);

 53:   /* apply Newton's finite difference method */
 54:   sa[0] = sf[0];
 55:   for (j=1;j<n;j++) {
 56:     for (k=n-1;k>=j;k--) {
 57:       d = sx[k]-sx[k-j];
 58:       if (PetscUnlikely(d == 0.0)) sf[k] = 0.0;  /* assume that the derivative is 0.0 and apply the Hermite interpolation */
 59:       else sf[k] = (sf[k]-sf[k-1]) / d;
 60:     }
 61:     sa[j] = sf[j];
 62:   }
 63:   return 0;
 64: }

 66: /*
 67:    FILTLAN function HermiteBaseFilterInChebyshevBasis

 69:    compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
 70:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval

 72:    The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
 73:    P_j(z) a Hermite interpolating polynomial

 75:    input:
 76:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
 77:    HiLowFlags determines the shape of the base filter P(z)
 78:    Consider the j-th interval [intv(j), intv(j+1)]
 79:    HighLowFlag[j-1]==1,  P(z)==1 for z in [intv(j), intv(j+1)]
 80:                    ==0,  P(z)==0 for z in [intv(j), intv(j+1)]
 81:                    ==-1, [intv(j), intv(j+1)] is a transition interval;
 82:                          P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
 83:    baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
 84:    to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
 85:    end points z=intv(j) for i=1,...,baseDeg

 87:    output:
 88:    P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
 89:    to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
 90:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
 91:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
 92:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
 93:    with T_i(z) the Chebyshev polynomial of the first kind,
 94:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
 95:    the return matrix is the matrix of Chebyshev coefficients pp just described

 97:    note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
 98:    let n be the length of intv; then there are n-1 intervals
 99:    therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
100: */
101: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
102: {
103:   PetscInt       m,ii,jj;
104:   PetscReal      flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
105:   const PetscInt *hilo = HighLowFlags;

107:   m = 2*baseDeg+2;
108:   jj = npoints-1;  /* jj is initialized as the number of intervals */
109:   PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
110:   sbf = baseFilter;

112:   while (jj--) {  /* the main loop to compute the Chebyshev coefficients */

114:     flag = (PetscReal)(*hilo++);  /* get the flag of the current interval */
115:     if (flag == -1.0) {  /* flag == -1 means that the current interval is a transition polynomial */

117:       flag2 = (PetscReal)(*hilo);  /* get flag2, the flag of the next interval */
118:       flag0 = 1.0-flag2;       /* the flag of the previous interval is 1-flag2 */

120:       /* two pointers for traversing x[] and y[] */
121:       sx = px;
122:       sy = py;

124:       /* find the current interval [aa,bb] */
125:       aa = *currentPoint++;
126:       bb = *currentPoint;

128:       /* now left-hand side */
129:       ii = baseDeg+1;
130:       while (ii--) {
131:         *sy++ = flag0;
132:         *sx++ = aa;
133:       }

135:       /* now right-hand side */
136:       ii = baseDeg+1;
137:       while (ii--) {
138:         *sy++ = flag2;
139:         *sx++ = bb;
140:       }

142:       /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
143:       FILTLAN_NewtonPolynomial(m,px,py,pp,work);

145:       /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
146:          P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */

148:       /* translate the Newton coefficients to the Chebyshev coefficients */
149:       FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
150:       /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */

152:       /* copy the Chebyshev coefficients to baseFilter
153:          OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
154:       sq = qq;
155:       ii = 2*baseDeg+2;
156:       while (ii--) *sbf++ = *sq++;

158:     } else {

160:       /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
161:        OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
162:       *sbf++ = flag;

164:       /* the other coefficients are all zero, since P(z) is a constant
165:        OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
166:       ii = 2*baseDeg+1;
167:       while (ii--) *sbf++ = 0.0;

169:       /* for the next point */
170:       currentPoint++;
171:     }
172:   }
173:   PetscFree5(px,py,pp,qq,work);
174:   return 0;
175: }

177: /* ////////////////////////////////////////////////////////////////////////////
178:    //    Base Filter
179:    //////////////////////////////////////////////////////////////////////////// */

181: /*
182:    FILTLAN function GetIntervals

184:    this routine determines the intervals (including the transition one(s)) by an iterative process

186:    frame is a vector consisting of 4 ordered elements:
187:        [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
188:        [frame(2),frame(3)] is the interval in which the eigenvalues are sought
189:    baseDeg is the left-and-right degree of the base filter for each interval
190:    polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
191:    intv is the output; the j-th interval is [intv(j),intv(j+1))
192:    opts is a collection of interval options

194:    the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
195:    at each end point of intervals

197:    the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
198:    minimized with s(z) a polynomial of degree up to polyDeg

200:    the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
201:    y in [frame[0],frame[3]] but not in [frame[1],frame[2]]

203:    the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
204: */
205: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
206: {
207:   PetscReal       intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
208:   PetscReal       yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
209:   PetscReal       yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
210:   PetscInt        i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
211:   PetscBool       leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
212:   const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
213:   const PetscInt  HighLowFlags[5] = { 1, -1, 0, -1, 1 };  /* if filterType is 1, only first 3 elements will be used */
214:   const PetscInt  numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));

218:   filterInfo->filterType = 2;      /* mid-pass filter, for interior eigenvalues */
219:   if (b == b1) {
221:     filterInfo->filterType = 1;    /* high-pass filter, for largest eigenvalues */

224:   /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
225:   halfPlateau = 0.5*(b1-a1)*opts->initialPlateau;    /* half length of the "plateau" of the (dual) base filter */
226:   leftDelta = (b1-a1)*opts->initialShiftStep;        /* initial left shift */
227:   rightDelta = leftDelta;                            /* initial right shift */
228:   opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
229:   gridSize = (b-a) / (PetscReal)(opts->numGridPoints);

231:   for (i=0;i<6;i++) intv[i] = 0.0;
232:   if (filterInfo->filterType == 2) {  /* for interior eigenvalues */
233:     npoints = 6;
234:     intv[0] = a;
235:     intv[5] = b;
236:     /* intv[1], intv[2], intv[3], and intv[4] to be determined */
237:   } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
238:     npoints = 4;
239:     intv[0] = a;
240:     intv[3] = b;
241:     /* intv[1], and intv[2] to be determined */
242:   }
243:   z1 = a1 - leftDelta;
244:   z2 = b1 + rightDelta;
245:   filterInfo->filterOK = 0;  /* not yet found any OK filter */

247:   /* allocate matrices */
248:   PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);

250:   /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
251:   intervals[0] = intv[0];
252:   intervals[3] = intv[3];
253:   intervals[5] = intv[5];
254:   intervals[1] = z1;
255:   if (filterInfo->filterType == 2) {  /* a mid-pass filter for interior eigenvalues */
256:     intervals[4] = z2;
257:     c = (a1+b1) / 2.0;
258:     intervals[2] = c - halfPlateau;
259:     intervals[3] = c + halfPlateau;
260:   } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
261:     intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
262:   }

264:   /* the main loop */
265:   for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
266:     if (z1 <= a) {  /* outer loop updates z1 and z2 */
267:       z1 = a;
268:       leftBoundaryMet = PETSC_TRUE;
269:     }
270:     if (filterInfo->filterType == 2) {  /* a <= z1 < (a1) */
271:       if (z2 >= b) {  /* a mid-pass filter for interior eigenvalues */
272:         z2 = b;
273:         rightBoundaryMet = PETSC_TRUE;
274:       }
275:       /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
276:       /* [z1, c-h] and [c+h, z2] are transition interval */
277:       intv[1] = z1;
278:       intv[4] = z2;
279:       c1 = z1 + halfPlateau;
280:       intv[2] = z1;           /* i.e. c1 - halfPlateau */
281:       intv[3] = c1 + halfPlateau;
282:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
283:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
284:       /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
285:       c2 = z2 - halfPlateau;
286:       intv[2] = c2 - halfPlateau;
287:       intv[3] = z2;           /* i.e. c2 + halfPlateau */
288:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290:       fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
291:       yLimitGap = PETSC_MAX_REAL;
292:       ii = opts->maxInnerIter;
293:       while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
294:         /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
295:         c = (c1+c2) / 2.0;
296:         intv[2] = c - halfPlateau;
297:         intv[3] = c + halfPlateau;
298:         FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
299:         FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
300:         fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
301:         if (fc*fc2 < 0.0) {
302:           c1 = c;
303:           /* fc1 = fc; */
304:         } else {
305:           c2 = c;
306:           fc2 = fc;
307:         }
308:         yLimitGap = PetscAbsReal(fc);
309:       }
310:     } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
311:       intv[1] = z1;
312:       intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
313:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
314:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
315:     }
316:     /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
317:        expanded in the `translated' Chebyshev basis */
318:     /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
319:     yLeftLimit  = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
320:     yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
321:     yLimit  = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
322:     ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
323:     x = a1;
324:     while ((x+=gridSize) < b1) {
325:       y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
326:       if (y < yLimit)  yLimit  = y;
327:       if (y > ySummit) ySummit = y;
328:     }
329:     /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
330:     stepLeft  = PETSC_FALSE;
331:     stepRight = PETSC_FALSE;
332:     if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
333:       /* very bad, step to see what will happen */
334:       stepLeft = PETSC_TRUE;
335:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
336:     } else if (filterInfo->filterType == 2) {
337:       if (yLeftLimit < yRightLimit) {
338:         if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
339:       } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
340:     }
341:     if (!stepLeft) {
342:       yLeftBottom = yLeftLimit;
343:       x = a1;
344:       while ((x-=gridSize) >= a) {
345:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
346:         if (y < yLeftBottom) yLeftBottom = y;
347:         else if (y > yLeftBottom) break;
348:       }
349:       yLeftSummit = yLeftBottom;
350:       while ((x-=gridSize) >= a) {
351:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352:         if (y > yLeftSummit) {
353:           yLeftSummit = y;
354:           if (yLeftSummit > yLimit*opts->yRippleLimit) {
355:             stepLeft = PETSC_TRUE;
356:             break;
357:           }
358:         }
359:         if (y < yLeftBottom) yLeftBottom = y;
360:       }
361:     }
362:     if (filterInfo->filterType == 2 && !stepRight) {
363:       yRightBottom = yRightLimit;
364:       x = b1;
365:       while ((x+=gridSize) <= b) {
366:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
367:         if (y < yRightBottom) yRightBottom = y;
368:         else if (y > yRightBottom) break;
369:       }
370:       yRightSummit = yRightBottom;
371:       while ((x+=gridSize) <= b) {
372:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373:         if (y > yRightSummit) {
374:           yRightSummit = y;
375:           if (yRightSummit > yLimit*opts->yRippleLimit) {
376:             stepRight = PETSC_TRUE;
377:             break;
378:           }
379:         }
380:         if (y < yRightBottom) yRightBottom = y;
381:       }
382:     }
383:     if (!stepLeft && !stepRight) {
384:       if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
385:       else bottom = yLeftBottom;
386:       qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
387:       if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
388:         /* found the first OK filter or a better filter */
389:         for (i=0;i<6;i++) intervals[i] = intv[i];
390:         filterInfo->filterOK           = 1;
391:         filterInfo->filterQualityIndex = qIndex;
392:         filterInfo->numIter            = numIter;
393:         filterInfo->yLimit             = yLimit;
394:         filterInfo->ySummit            = ySummit;
395:         filterInfo->numLeftSteps       = numLeftSteps;
396:         filterInfo->yLeftSummit        = yLeftSummit;
397:         filterInfo->yLeftBottom        = yLeftBottom;
398:         if (filterInfo->filterType == 2) {
399:           filterInfo->yLimitGap        = yLimitGap;
400:           filterInfo->numRightSteps    = numRightSteps;
401:           filterInfo->yRightSummit     = yRightSummit;
402:           filterInfo->yRightBottom     = yRightBottom;
403:         }
404:         numMoreLooked = 0;
405:       } else if (++numMoreLooked == numLookMore) {
406:         /* filter has been optimal */
407:         filterInfo->filterOK = 2;
408:         break;
409:       }
410:       /* try stepping further to see whether it can improve */
411:       stepLeft = PETSC_TRUE;
412:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
413:     }
414:     /* check whether we can really "step" */
415:     if (leftBoundaryMet) {
416:       if (filterInfo->filterType == 1 || rightBoundaryMet) break;  /* cannot step further, so break the loop */
417:       if (stepLeft) {
418:         /* cannot step left, so try stepping right */
419:         stepLeft  = PETSC_FALSE;
420:         stepRight = PETSC_TRUE;
421:       }
422:     }
423:     if (rightBoundaryMet && stepRight) {
424:       /* cannot step right, so try stepping left */
425:       stepRight = PETSC_FALSE;
426:       stepLeft  = PETSC_TRUE;
427:     }
428:     /* now "step" */
429:     if (stepLeft) {
430:       numLeftSteps++;
431:       if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
432:       z1 -= leftDelta;
433:     }
434:     if (stepRight) {
435:       numRightSteps++;
436:       rightDelta *= opts->shiftStepExpanRate;  /* expand the step for faster convergence */
437:       z2 += rightDelta;
438:     }
439:     if (filterInfo->filterType == 2) {
440:       /* shrink the "plateau" of the (dual) base filter */
441:       if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
442:       else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
443:     }
444:   }

447:   filterInfo->totalNumIter = numIter;
448:   PetscFree2(polyFilter,baseFilter);
449:   return 0;
450: }

452: /* ////////////////////////////////////////////////////////////////////////////
453:    //    Chebyshev Polynomials
454:    //////////////////////////////////////////////////////////////////////////// */

456: /*
457:    FILTLAN function ExpandNewtonPolynomialInChebyshevBasis

459:    translate the coefficients of a Newton polynomial to the coefficients
460:    in a basis of the `translated' (scale-and-shift) Chebyshev polynomials

462:    input:
463:    a Newton polynomial defined by vectors a and x:
464:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
465:    the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
466:        where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
467:    note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2

469:    output:
470:    a vector q containing the Chebyshev coefficients:
471:        P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
472: */
473: static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
474: {
475:   PetscInt  m,mm;
476:   PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;

478:   sa = a+n;    /* pointers for traversing a and x */
479:   sx = x+n-1;
480:   *q = *--sa;  /* set q[0] = a(n) */

482:   c = (aa+bb)/2.0;
483:   h = (bb-aa)/2.0;
484:   h2 = h/2.0;

486:   for (m=1;m<=n-1;m++) {  /* the main loop for translation */

488:     /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
489:     mm = m;
490:     sq = q;
491:     sq2 = q2;
492:     c2 = c-(*--sx);
493:     while (mm--) *(sq2++) = c2*(*sq++);
494:     *sq2 = 0.0;         /* q2[m] = 0.0 */
495:     *(q2+1) += h*(*q);  /* q2[1] = q2[1] + h*q[0] */

497:     /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
498:     mm = m-1;
499:     sq2 = q2;
500:     sq = q+1;
501:     while (mm--) *(sq2++) += h2*(*sq++);

503:     /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
504:     mm = m-1;
505:     sq2 = q2+2;
506:     sq = q+1;
507:     while (mm--) *(sq2++) += h2*(*sq++);

509:     /* compute q[0:m] = q2[0:m] */
510:     mm = m+1;
511:     sq2 = q2;
512:     sq = q;
513:     while (mm--) *sq++ = *sq2++;
514:     *q += (*--sa);      /* q[0] = q[0] + p[n-m-1] */
515:   }
516:   return 0;
517: }

519: /*
520:    FILTLAN function PolynomialEvaluationInChebyshevBasis

522:    evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
523:    the `translated' (i.e. scale-and-shift) Chebyshev polynomials

525:    input:
526:    c is a vector of Chebyshev coefficients which defines the polynomial
527:        P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
528:    where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
529:        c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
530:    note that T_i(z) is the Chebyshev polynomial of the first kind,
531:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

533:    output:
534:    the evaluated value of P(z) at z=z0
535: */
536: static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
537: {
538:   PetscInt  ii,deg1;
539:   PetscReal y,zz,t0,t1,t2,*sc;

541:   deg1 = m;
542:   if (aa==-1.0 && bb==1.0) zz = z0;  /* treat it as a special case to reduce rounding errors */
543:   else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);

545:   /* compute y = P(z0), where we utilize the Chebyshev recursion */
546:   sc = pp+(idx-1)*m;   /* sc points to column idx of pp */
547:   y = *sc++;
548:   t1 = 1.0; t2 = zz;
549:   ii = deg1-1;
550:   while (ii--) {
551:     /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
552:        the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
553:     t0 = 2*zz*t1 - t2;
554:     /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
555:     t2 = t1;
556:     t1 = t0;
557:     y += (*sc++)*t0;
558:   }
559:   return y;
560: }

562: #define basisTranslated PETSC_TRUE
563: /*
564:    FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis

566:    evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
567:    in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval

569:    input:
570:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
571:    pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
572:    in a basis of the `translated' Chebyshev polynomials in each interval
573:    the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
574:        if basisTranslated == false, then
575:            P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
576:        where T_i(z) is the Chebyshev polynomial of the first kind,
577:            T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
578:        if basisTranslated == true, then
579:            P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
580:        where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
581:            c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2

583:    output:
584:    the evaluated value of P(z) at z=z0

586:    note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
587:              if z0 flies over  the last  interval, then the polynomial in the last  interval will be used to evaluate P(z0)
588: */
589: static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
590: {
591:   PetscReal *sintv,aa,bb,resul;
592:   PetscInt  idx;

594:   /* determine the interval which contains z0 */
595:   sintv = &intv[1];
596:   idx = 1;
597:   if (npoints>2 && z0 >= *sintv) {
598:     sintv++;
599:     while (++idx < npoints-1) {
600:       if (z0 < *sintv) break;
601:       sintv++;
602:     }
603:   }
604:   /* idx==1 if npoints<=2; otherwise idx satisfies:
605:          intv(idx) <= z0 < intv(idx+1),  if 2 <= idx <= npoints-2
606:          z0 < intv(idx+1),               if idx == 1
607:          intv(idx) <= z0,                if idx == npoints-1
608:      in addition, sintv points to &intv(idx+1) */
609:   if (basisTranslated) {
610:     /* the basis consists of `translated' Chebyshev polynomials */
611:     /* find the interval of concern, [aa,bb] */
612:     aa = *(sintv-1);
613:     bb = *sintv;
614:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
615:   } else {
616:     /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
617:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
618:   }
619:   return resul;
620: }

622: /*
623:    FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis

625:    compute the weighted inner product of two piecewise polynomials expanded
626:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

628:    pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
629:    for z in the j-th interval, P(z) equals
630:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
631:    and Q(z) equals
632:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
633:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
634:        S_i((z-c)/h) = T_i(z),  c = (aa+bb)) / 2,  h = (bb-aa) / 2,
635:    with T_i(z) the Chebyshev polynomial of the first kind
636:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

638:    the (scaled) j-th interval inner product is defined by
639:        <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
640:    which comes from the property
641:        <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j

643:    the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
644:    which is the return value

646:    note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
647: */
648: static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
649: {
650:   PetscInt  nintv,deg1,i,k;
651:   PetscReal *sp,*sq,ans=0.0,ans2;

653:   deg1 = PetscMin(prows,qrows);  /* number of effective coefficients, one more than the effective polynomial degree */
654:   if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
655:   nintv = PetscMin(pcols,qcols);  /* number of intervals */

657:   /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
658:      compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
659:   for (i=0;i<nintv;i++) {   /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
660:     sp = pp+i*ldp;
661:     sq = qq+i*ldq;
662:     ans2 = (*sp) * (*sq);  /* the first term pp(1,i)*qq(1,i) is being added twice */
663:     for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++);  /* add pp(k,i)*qq(k,i) */
664:     ans += ans2*intervalWeights[i];  /* compute ans += ans2*intervalWeights(i) */
665:   }
666:   PetscFunctionReturn(ans*PETSC_PI/2.0);
667: }

669: /*
670:    FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX

672:    compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
673:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

675:    P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively

677:    For z in the j-th interval, P(z) equals
678:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
679:    and Q(z) equals
680:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
681:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
682:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
683:    with T_i(z) the Chebyshev polynomial of the first kind
684:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

686:    the returned matrix is qq which represents Q(z) = z*P(z)
687: */
688: static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
689: {
690:   PetscInt  i,j;
691:   PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;

693:   for (j=0;j<nintv;j++) {    /* consider interval between intv(j) and intv(j+1) */
694:     sp = pp+j*ldp;
695:     sq = qq+j*ldq;
696:     sp2 = sp;
697:     sq2 = sq+1;
698:     c = 0.5*(intv[j] + intv[j+1]);   /* compute c = (intv(j) + intv(j+1))/2 */
699:     h = 0.5*(intv[j+1] - intv[j]);   /* compute h = (intv(j+1) - intv(j))/2  and  h2 = h/2 */
700:     h2 = 0.5*h;
701:     i = deg1;
702:     while (i--) *sq++ = c*(*sp++);    /* compute q(1:deg1,j) = c*p(1:deg1,j) */
703:     *sq++ = 0.0;                      /* set q(deg1+1,j) = 0.0 */
704:     *(sq2++) += h*(*sp2++);           /* compute q(2,j) = q(2,j) + h*p(1,j) */
705:     i = deg1-1;
706:     while (i--) {       /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
707:       tmp = h2*(*sp2++);
708:       *(sq2-2) += tmp;
709:       *(sq2++) += tmp;
710:     }
711:   }
712:   return 0;
713: }

715: /* ////////////////////////////////////////////////////////////////////////////
716:    //    Conjugate Residual Method in the Polynomial Space
717:    //////////////////////////////////////////////////////////////////////////// */

719: /*
720:     B := alpha*A + beta*B

722:     A,B are nxk
723: */
724: static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
725: {
726:   PetscInt       i,j;

728:   if (beta==(PetscReal)1.0) {
729:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
730:     PetscLogFlops(2.0*n*k);
731:   } else {
732:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
733:     PetscLogFlops(3.0*n*k);
734:   }
735:   return 0;
736: }

738: /*
739:    FILTLAN function FilteredConjugateResidualPolynomial

741:    ** Conjugate Residual Method in the Polynomial Space

743:    this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
744:    where P(z), the base filter, is the input piecewise polynomial, and
745:          Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
746:    niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
747:    both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
748:    and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively

750:    input:
751:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
752:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
753:        the interval weights define the inner product of two continuous functions and then
754:        the derived w-norm ||P(z)-Q(z)||_w
755:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
756:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
757:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
758:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
759:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
760:    with T_i(z) the Chebyshev polynomial of the first kind,
761:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

763:    output:
764:    the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
765:    and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
766:    this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
767:    to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
768:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)

770:    note:
771:    1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
772:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
773:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
774:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
775: */
776: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
777: {
778:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
779:   PetscReal      rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;

781:   nintv = m-1;
782:   ld = niter+2;  /* leading dimension */
783:   PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
784:   PetscArrayzero(cpol,ld*nintv);
785:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
786:   sppol = 2;
787:   srpol = 2;
788:   scpol = 2;
789:   for (j=0;j<nintv;j++) {
790:     ppol[j*ld] = 1.0;
791:     rpol[j*ld] = 1.0;
792:     cpol[j*ld] = 1.0;
793:   }
794:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
795:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
796:      cpol is the "corrected" residual polynomial (result of this function) */
797:   sappol = 3;
798:   sarpol = 3;
799:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
800:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
801:   rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
802:   for (i=0;i<niter;i++) {
803:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
804:     alp0 = rho/den;
805:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
806:     alp  = (rho-rho1)/den;
807:     srpol++;
808:     scpol++;
809:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
810:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
811:     if (i+1 == niter) break;
812:     sarpol++;
813:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
814:     rho0 = rho;
815:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
816:     bet  = rho / rho0;
817:     sppol++;
818:     sappol++;
819:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
820:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
821:   }
822:   PetscFree4(ppol,rpol,appol,arpol);
823:   return 0;
824: }

826: /*
827:    FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct

829:    this routine employs a conjugate-residual-type algorithm in polynomial space to compute
830:    x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
831:    P(z) is a given piecewise polynomial, called the base filter,
832:    s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
833:    and b and x0 are given vectors

835:    note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
836:    and presented as a matrix of Chebyshev coefficients pp

838:    input:
839:    A is a sparse matrix
840:    x0, b are vectors
841:    niter is the number of conjugate-residual iterations
842:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
843:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
844:        the interval weights define the inner product of two continuous functions and then
845:        the derived w-norm ||P(z)-Q(z)||_w
846:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
847:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
848:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
849:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
850:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
851:    with T_i(z) the Chebyshev polynomial of the first kind,
852:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
853:    tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
854:        than tol, then stop the conjugate-residual iteration

856:    output:
857:    the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
858:    subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
859:    in short, z*s(z) approximates 1-P(z)

861:    note:
862:    1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
863:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
864:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
865:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
866:    3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
867:       in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
868:       z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
869: */
870: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
871: {
872:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
873:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
874:   Vec            r=work[0],p=work[1],ap=work[2],w=work[3];
875:   PetscScalar    alpha;

877:   ld = niter+3;  /* leading dimension */
878:   PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
879:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
880:   sppol = 2;
881:   srpol = 2;
882:   scpol = 2;
883:   for (j=0;j<nintv;j++) {
884:     ppol[j*ld] = 1.0;
885:     rpol[j*ld] = 1.0;
886:     cpol[j*ld] = 1.0;
887:   }
888:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
889:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
890:      cpol is the "corrected" residual polynomial */
891:   sappol = 3;
892:   sarpol = 3;
893:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
894:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
895:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
896:   rho = rho00;

898:   /* corrected CR in vector space */
899:   /* we assume x0 is always 0 */
900:   VecSet(x,0.0);
901:   VecCopy(b,r);     /* initial residual r = b-A*x0 */
902:   VecCopy(r,p);     /* p = r */
903:   MatMult(A,p,ap);  /* ap = A*p */

905:   for (i=0;i<niter;i++) {
906:     /* iteration in the polynomial space */
907:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
908:     alp0 = rho/den;
909:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
910:     alp  = (rho-rho1)/den;
911:     srpol++;
912:     scpol++;
913:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
914:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
915:     sarpol++;
916:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
917:     rho0 = rho;
918:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

920:     /* update x in the vector space */
921:     alpha = alp;
922:     VecAXPY(x,alpha,p);   /* x += alp*p */
923:     if (rho < tol*rho00) break;

925:     /* finish the iteration in the polynomial space */
926:     bet = rho / rho0;
927:     sppol++;
928:     sappol++;
929:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
930:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);

932:     /* finish the iteration in the vector space */
933:     alpha = -alp0;
934:     VecAXPY(r,alpha,ap);    /* r -= alp0*ap */
935:     alpha = bet;
936:     VecAYPX(p,alpha,r);     /* p = r + bet*p */
937:     MatMult(A,r,w);         /* ap = A*r + bet*ap */
938:     VecAYPX(ap,alpha,w);
939:   }
940:   PetscFree5(ppol,rpol,cpol,appol,arpol);
941:   return 0;
942: }

944: /*
945:    Gateway to FILTLAN for evaluating y=p(A)*x
946: */
947: PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
948: {
949:   ST             st;
950:   ST_FILTER      *ctx;
951:   PetscInt       npoints;

953:   MatShellGetContext(A,&st);
954:   ctx = (ST_FILTER*)st->data;
955:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
956:   FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
957:   VecCopy(y,st->work[0]);
958:   MatMult(ctx->T,st->work[0],y);
959:   return 0;
960: }

962: /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
963: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
964: {
965:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
966:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
967:   PetscScalar    alpha;

969:   ld = niter+3;  /* leading dimension */
970:   PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
971:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
972:   sppol = 2;
973:   srpol = 2;
974:   scpol = 2;
975:   for (j=0;j<nintv;j++) {
976:     ppol[j*ld] = 1.0;
977:     rpol[j*ld] = 1.0;
978:     cpol[j*ld] = 1.0;
979:   }
980:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
981:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
982:      cpol is the "corrected" residual polynomial */
983:   sappol = 3;
984:   sarpol = 3;
985:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
986:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
987:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
988:   rho = rho00;

990:   /* corrected CR in vector space */
991:   /* we assume x0 is always 0 */
992:   MatZeroEntries(C);
993:   MatCopy(B,R,SAME_NONZERO_PATTERN);     /* initial residual r = b-A*x0 */
994:   MatCopy(R,P,SAME_NONZERO_PATTERN);     /* p = r */
995:   MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP);  /* ap = A*p */

997:   for (i=0;i<niter;i++) {
998:     /* iteration in the polynomial space */
999:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1000:     alp0 = rho/den;
1001:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1002:     alp  = (rho-rho1)/den;
1003:     srpol++;
1004:     scpol++;
1005:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
1006:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
1007:     sarpol++;
1008:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
1009:     rho0 = rho;
1010:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

1012:     /* update x in the vector space */
1013:     alpha = alp;
1014:     MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN);   /* x += alp*p */
1015:     if (rho < tol*rho00) break;

1017:     /* finish the iteration in the polynomial space */
1018:     bet = rho / rho0;
1019:     sppol++;
1020:     sappol++;
1021:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
1022:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);

1024:     /* finish the iteration in the vector space */
1025:     alpha = -alp0;
1026:     MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN);    /* r -= alp0*ap */
1027:     alpha = bet;
1028:     MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN);     /* p = r + bet*p */
1029:     MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W);         /* ap = A*r + bet*ap */
1030:     MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN);
1031:   }
1032:   PetscFree5(ppol,rpol,cpol,appol,arpol);
1033:   return 0;
1034: }

1036: /*
1037:    Gateway to FILTLAN for evaluating C=p(A)*B
1038: */
1039: PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1040: {
1041:   ST             st;
1042:   ST_FILTER      *ctx;
1043:   PetscInt       i,m1,m2,npoints;

1045:   MatShellGetContext(A,&st);
1046:   ctx = (ST_FILTER*)st->data;
1047:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1048:   if (ctx->nW) {  /* check if work matrices must be resized */
1049:     MatGetSize(B,NULL,&m1);
1050:     MatGetSize(ctx->W[0],NULL,&m2);
1051:     if (m1!=m2) {
1052:       MatDestroyMatrices(ctx->nW,&ctx->W);
1053:       ctx->nW = 0;
1054:     }
1055:   }
1056:   if (!ctx->nW) {  /* allocate work matrices */
1057:     ctx->nW = 4;
1058:     PetscMalloc1(ctx->nW,&ctx->W);
1059:     for (i=0;i<ctx->nW;i++) MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]);
1060:   }
1061:   FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]);
1062:   MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C);
1063:   return 0;
1064: }

1066: /*
1067:    FILTLAN function PolynomialFilterInterface::setFilter

1069:    Creates the shifted (and scaled) matrix and the base filter P(z).
1070:    M is a shell matrix whose MatMult() applies the filter.
1071: */
1072: PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)
1073: {
1074:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
1075:   PetscInt       i,npoints,n,m,N,M;
1076:   PetscReal      frame2[4];
1077:   PetscScalar    alpha;
1078:   const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };

1080:   if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
1081:     /* T = frame[3]*eye(n) - A */
1082:     MatDestroy(&ctx->T);
1083:     MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T);
1084:     MatScale(ctx->T,-1.0);
1085:     alpha = ctx->frame[3];
1086:     MatShift(ctx->T,alpha);
1087:     for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1088:   } else {  /* it can be a mid-pass filter or a high-pass filter */
1089:     if (ctx->frame[0] == 0.0) {
1090:       PetscObjectReference((PetscObject)st->A[0]);
1091:       MatDestroy(&ctx->T);
1092:       ctx->T = st->A[0];
1093:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
1094:     } else {
1095:       /* T = A - frame[0]*eye(n) */
1096:       MatDestroy(&ctx->T);
1097:       MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T);
1098:       alpha = -ctx->frame[0];
1099:       MatShift(ctx->T,alpha);
1100:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1101:     }
1102:   }

1104:   /* no need to recompute filter if the parameters did not change */
1105:   if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1106:     FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1107:     /* translate the intervals back */
1108:     if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
1109:       for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1110:     } else {  /* it can be a mid-pass filter or a high-pass filter */
1111:       if (ctx->frame[0] == 0.0) {
1112:         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1113:       } else {
1114:         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1115:       }
1116:     }

1118:     npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1119:     PetscFree(ctx->baseFilter);
1120:     PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1121:     FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1122:     PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit);
1123:   }
1124:   ctx->filtch = PETSC_FALSE;

1126:   /* create shell matrix*/
1127:   if (!*G) {
1128:     MatGetSize(ctx->T,&N,&M);
1129:     MatGetLocalSize(ctx->T,&n,&m);
1130:     MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G);
1131:     MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN);
1132:     MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE);
1133:     MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA);
1134:   }
1135:   return 0;
1136: }