Actual source code: dshep.c
slepc-3.18.2 2023-01-26
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
15: {
16: DSAllocateMat_Private(ds,DS_MAT_A);
17: DSAllocateMat_Private(ds,DS_MAT_Q);
18: DSAllocateMat_Private(ds,DS_MAT_T);
19: PetscFree(ds->perm);
20: PetscMalloc1(ld,&ds->perm);
21: return 0;
22: }
24: /* 0 l k n-1
25: -----------------------------------------
26: |* . . |
27: | * . . |
28: | * . . |
29: | * . . |
30: |. . . . o o |
31: | o o |
32: | o o |
33: | o o |
34: | o o |
35: | o o |
36: |. . . . o o o o o o o x |
37: | x x x |
38: | x x x |
39: | x x x |
40: | x x x |
41: | x x x |
42: | x x x |
43: | x x x |
44: | x x x|
45: | x x|
46: -----------------------------------------
47: */
49: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
50: {
51: PetscReal *T;
52: PetscScalar *A;
53: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
55: /* switch from compact (arrow) to dense storage */
56: MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A);
57: DSGetArrayReal(ds,DS_MAT_T,&T);
58: PetscArrayzero(A,ld*ld);
59: for (i=0;i<k;i++) {
60: A[i+i*ld] = T[i];
61: A[k+i*ld] = T[i+ld];
62: A[i+k*ld] = T[i+ld];
63: }
64: A[k+k*ld] = T[k];
65: for (i=k+1;i<n;i++) {
66: A[i+i*ld] = T[i];
67: A[i-1+i*ld] = T[i-1+ld];
68: A[i+(i-1)*ld] = T[i-1+ld];
69: }
70: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
71: MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A);
72: DSRestoreArrayReal(ds,DS_MAT_T,&T);
73: return 0;
74: }
76: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
77: {
78: PetscViewerFormat format;
79: PetscInt i,j,r,c,rows;
80: PetscReal *T,value;
81: const char *methodname[] = {
82: "Implicit QR method (_steqr)",
83: "Relatively Robust Representations (_stevr)",
84: "Divide and Conquer method (_stedc)",
85: "Block Divide and Conquer method (dsbtdc)"
86: };
87: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
89: PetscViewerGetFormat(viewer,&format);
90: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
91: if (ds->bs>1) PetscViewerASCIIPrintf(viewer,"block size: %" PetscInt_FMT "\n",ds->bs);
92: if (ds->method<nmeth) PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
93: return 0;
94: }
95: if (ds->compact) {
96: DSGetArrayReal(ds,DS_MAT_T,&T);
97: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
98: rows = ds->extrarow? ds->n+1: ds->n;
99: if (format == PETSC_VIEWER_ASCII_MATLAB) {
100: PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,ds->n);
101: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",3*ds->n);
102: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
103: for (i=0;i<ds->n;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)T[i]);
104: for (i=0;i<rows-1;i++) {
105: r = PetscMax(i+2,ds->k+1);
106: c = i+1;
107: PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",r,c,(double)T[i+ds->ld]);
108: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
109: PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",c,r,(double)T[i+ds->ld]);
110: }
111: }
112: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
113: } else {
114: for (i=0;i<rows;i++) {
115: for (j=0;j<ds->n;j++) {
116: if (i==j) value = T[i];
117: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = T[PetscMin(i,j)+ds->ld];
118: else if (i==j+1 && i>ds->k) value = T[i-1+ds->ld];
119: else if (i+1==j && j>ds->k) value = T[j-1+ds->ld];
120: else value = 0.0;
121: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
122: }
123: PetscViewerASCIIPrintf(viewer,"\n");
124: }
125: }
126: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
127: PetscViewerFlush(viewer);
128: DSRestoreArrayReal(ds,DS_MAT_T,&T);
129: } else DSViewMat(ds,viewer,DS_MAT_A);
130: if (ds->state>DS_STATE_INTERMEDIATE) DSViewMat(ds,viewer,DS_MAT_Q);
131: return 0;
132: }
134: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
135: {
136: PetscScalar *Z;
137: const PetscScalar *Q;
138: PetscInt ld = ds->ld;
140: switch (mat) {
141: case DS_MAT_X:
142: case DS_MAT_Y:
143: if (j) {
144: MatDenseGetArray(ds->omat[mat],&Z);
145: if (ds->state>=DS_STATE_CONDENSED) {
146: MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q);
147: PetscArraycpy(Z+(*j)*ld,Q+(*j)*ld,ld);
148: if (rnorm) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
149: MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q);
150: } else {
151: PetscArrayzero(Z+(*j)*ld,ld);
152: Z[(*j)+(*j)*ld] = 1.0;
153: if (rnorm) *rnorm = 0.0;
154: }
155: MatDenseRestoreArray(ds->omat[mat],&Z);
156: } else {
157: if (ds->state>=DS_STATE_CONDENSED) MatCopy(ds->omat[DS_MAT_Q],ds->omat[mat],SAME_NONZERO_PATTERN);
158: else DSSetIdentity(ds,mat);
159: }
160: break;
161: case DS_MAT_U:
162: case DS_MAT_V:
163: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
164: default:
165: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
166: }
167: return 0;
168: }
170: /*
171: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
173: [ d 0 0 0 e ]
174: [ 0 d 0 0 e ]
175: A = [ 0 0 d 0 e ]
176: [ 0 0 0 d e ]
177: [ e e e e d ]
179: to tridiagonal form
181: [ d e 0 0 0 ]
182: [ e d e 0 0 ]
183: T = Q'*A*Q = [ 0 e d e 0 ],
184: [ 0 0 e d e ]
185: [ 0 0 0 e d ]
187: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
188: perform the reduction, which requires O(n**2) flops. The accumulation
189: of the orthogonal factor Q, however, requires O(n**3) flops.
191: Arguments
192: =========
194: N (input) INTEGER
195: The order of the matrix A. N >= 0.
197: D (input/output) DOUBLE PRECISION array, dimension (N)
198: On entry, the diagonal entries of the matrix A to be
199: reduced.
200: On exit, the diagonal entries of the reduced matrix T.
202: E (input/output) DOUBLE PRECISION array, dimension (N-1)
203: On entry, the off-diagonal entries of the matrix A to be
204: reduced.
205: On exit, the subdiagonal entries of the reduced matrix T.
207: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
208: On exit, the orthogonal matrix Q.
210: LDQ (input) INTEGER
211: The leading dimension of the array Q.
213: Note
214: ====
215: Based on Fortran code contributed by Daniel Kressner
216: */
217: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
218: {
219: PetscBLASInt i,j,j2,one=1;
220: PetscReal c,s,p,off,temp;
222: if (n<=2) return 0;
224: for (j=0;j<n-2;j++) {
226: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
227: temp = e[j+1];
228: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
229: s = -s;
231: /* Apply rotation to diagonal elements */
232: temp = d[j+1];
233: e[j] = c*s*(temp-d[j]);
234: d[j+1] = s*s*d[j] + c*c*temp;
235: d[j] = c*c*d[j] + s*s*temp;
237: /* Apply rotation to Q */
238: j2 = j+2;
239: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
241: /* Chase newly introduced off-diagonal entry to the top left corner */
242: for (i=j-1;i>=0;i--) {
243: off = -s*e[i];
244: e[i] = c*e[i];
245: temp = e[i+1];
246: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
247: s = -s;
248: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
249: p = s*temp;
250: d[i+1] += p;
251: d[i] -= p;
252: e[i] = -e[i] - c*temp;
253: j2 = j+2;
254: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
255: }
256: }
257: return 0;
258: }
260: /*
261: Reduce to tridiagonal form by means of ArrowTridiag.
262: */
263: static PetscErrorCode DSIntermediate_HEP(DS ds)
264: {
265: PetscInt i;
266: PetscBLASInt n1 = 0,n2,lwork,info,l = 0,n = 0,ld,off;
267: PetscScalar *Q,*work,*tau;
268: const PetscScalar *A;
269: PetscReal *d,*e;
270: Mat At,Qt; /* trailing submatrices */
272: PetscBLASIntCast(ds->n,&n);
273: PetscBLASIntCast(ds->l,&l);
274: PetscBLASIntCast(ds->ld,&ld);
275: PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1); /* size of leading block, excl. locked */
276: n2 = n-l; /* n2 = n1 + size of trailing block */
277: off = l+l*ld;
278: DSGetArrayReal(ds,DS_MAT_T,&d);
279: e = d+ld;
280: DSSetIdentity(ds,DS_MAT_Q);
281: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
283: if (ds->compact) {
285: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
287: } else {
289: MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A);
290: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
292: if (ds->state<DS_STATE_INTERMEDIATE) {
293: MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At);
294: MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt);
295: MatCopy(At,Qt,SAME_NONZERO_PATTERN);
296: MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At);
297: MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt);
298: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
299: tau = ds->work;
300: work = ds->work+ld;
301: lwork = ld*ld;
302: PetscCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n2,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
303: SlepcCheckLapackInfo("sytrd",info);
304: PetscCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n2,Q+off,&ld,tau,work,&lwork,&info));
305: SlepcCheckLapackInfo("orgtr",info);
306: } else {
307: /* copy tridiagonal to d,e */
308: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
309: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
310: }
311: MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A);
312: }
313: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
314: DSRestoreArrayReal(ds,DS_MAT_T,&d);
315: return 0;
316: }
318: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
319: {
320: PetscInt n,l,i,*perm,ld=ds->ld;
321: PetscScalar *A;
322: PetscReal *d;
324: if (!ds->sc) return 0;
325: n = ds->n;
326: l = ds->l;
327: DSGetArrayReal(ds,DS_MAT_T,&d);
328: perm = ds->perm;
329: if (!rr) DSSortEigenvaluesReal_Private(ds,d,perm);
330: else DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
331: for (i=l;i<n;i++) wr[i] = d[perm[i]];
332: DSPermuteColumns_Private(ds,l,n,n,DS_MAT_Q,perm);
333: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
334: if (!ds->compact) {
335: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
336: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
337: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
338: }
339: DSRestoreArrayReal(ds,DS_MAT_T,&d);
340: return 0;
341: }
343: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
344: {
345: PetscInt i;
346: PetscBLASInt n,ld,incx=1;
347: PetscScalar *A,*x,*y,one=1.0,zero=0.0;
348: PetscReal *T,*e,beta;
349: const PetscScalar *Q;
351: PetscBLASIntCast(ds->n,&n);
352: PetscBLASIntCast(ds->ld,&ld);
353: MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q);
354: if (ds->compact) {
355: DSGetArrayReal(ds,DS_MAT_T,&T);
356: e = T+ld;
357: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
358: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
359: DSRestoreArrayReal(ds,DS_MAT_T,&T);
360: ds->k = n;
361: } else {
362: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
363: DSAllocateWork_Private(ds,2*ld,0,0);
364: x = ds->work;
365: y = ds->work+ld;
366: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
367: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
368: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
369: ds->k = n;
370: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
371: }
372: MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q);
373: return 0;
374: }
376: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
377: {
378: PetscInt i;
379: PetscBLASInt n1,info,l = 0,n = 0,ld,off;
380: PetscScalar *Q,*A;
381: PetscReal *d,*e;
384: PetscBLASIntCast(ds->n,&n);
385: PetscBLASIntCast(ds->l,&l);
386: PetscBLASIntCast(ds->ld,&ld);
387: n1 = n-l; /* n1 = size of leading block, excl. locked + size of trailing block */
388: off = l+l*ld;
389: DSGetArrayReal(ds,DS_MAT_T,&d);
390: e = d+ld;
392: /* Reduce to tridiagonal form */
393: DSIntermediate_HEP(ds);
395: /* Solve the tridiagonal eigenproblem */
396: for (i=0;i<l;i++) wr[i] = d[i];
398: DSAllocateWork_Private(ds,0,2*ld,0);
399: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
400: PetscCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&info));
401: SlepcCheckLapackInfo("steqr",info);
402: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
403: for (i=l;i<n;i++) wr[i] = d[i];
405: /* Create diagonal matrix as a result */
406: if (ds->compact) PetscArrayzero(e,n-1);
407: else {
408: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
409: for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
410: for (i=l;i<n;i++) A[i+i*ld] = d[i];
411: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
412: }
413: DSRestoreArrayReal(ds,DS_MAT_T,&d);
415: /* Set zero wi */
416: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
417: return 0;
418: }
420: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
421: {
422: Mat At,Qt; /* trailing submatrices */
423: PetscInt i;
424: PetscBLASInt n1 = 0,n2 = 0,n3,lrwork,liwork,info,l = 0,n = 0,m = 0,ld,off,il,iu,*isuppz;
425: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
426: PetscReal *d,*e,abstol=0.0,vl,vu;
427: #if defined(PETSC_USE_COMPLEX)
428: PetscInt j;
429: PetscReal *Qr,*ritz;
430: #endif
433: PetscBLASIntCast(ds->n,&n);
434: PetscBLASIntCast(ds->l,&l);
435: PetscBLASIntCast(ds->ld,&ld);
436: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
437: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
438: n3 = n1+n2;
439: off = l+l*ld;
440: DSGetArrayReal(ds,DS_MAT_T,&d);
441: e = d+ld;
443: /* Reduce to tridiagonal form */
444: DSIntermediate_HEP(ds);
446: /* Solve the tridiagonal eigenproblem */
447: for (i=0;i<l;i++) wr[i] = d[i];
449: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
450: DSAllocateMat_Private(ds,DS_MAT_W);
451: MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
452: }
453: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
454: lrwork = 20*ld;
455: liwork = 10*ld;
456: #if defined(PETSC_USE_COMPLEX)
457: DSAllocateWork_Private(ds,0,lrwork+ld+ld*ld,liwork+2*ld);
458: #else
459: DSAllocateWork_Private(ds,0,lrwork+ld,liwork+2*ld);
460: #endif
461: isuppz = ds->iwork+liwork;
462: #if defined(PETSC_USE_COMPLEX)
463: ritz = ds->rwork+lrwork;
464: Qr = ds->rwork+lrwork+ld;
465: PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,Qr+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
466: for (i=l;i<n;i++) wr[i] = ritz[i];
467: #else
468: PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
469: #endif
470: SlepcCheckLapackInfo("stevr",info);
471: #if defined(PETSC_USE_COMPLEX)
472: for (i=l;i<n;i++)
473: for (j=l;j<n;j++)
474: Q[i+j*ld] = Qr[i+j*ld];
475: #endif
476: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
477: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
478: MatDenseGetArray(ds->omat[DS_MAT_W],&W);
479: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
480: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
481: MatDenseRestoreArray(ds->omat[DS_MAT_W],&W);
482: MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At);
483: MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt);
484: MatCopy(At,Qt,SAME_NONZERO_PATTERN);
485: MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At);
486: MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt);
487: }
488: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
489: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
491: /* Create diagonal matrix as a result */
492: if (ds->compact) PetscArrayzero(e,n-1);
493: else {
494: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
495: for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
496: for (i=l;i<n;i++) A[i+i*ld] = d[i];
497: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
498: }
499: DSRestoreArrayReal(ds,DS_MAT_T,&d);
501: /* Set zero wi */
502: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
503: return 0;
504: }
506: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
507: {
508: PetscInt i;
509: PetscBLASInt n1,info,l = 0,ld,off,lrwork,liwork;
510: PetscScalar *Q,*A;
511: PetscReal *d,*e;
512: #if defined(PETSC_USE_COMPLEX)
513: PetscBLASInt lwork;
514: PetscInt j;
515: #endif
518: PetscBLASIntCast(ds->l,&l);
519: PetscBLASIntCast(ds->ld,&ld);
520: PetscBLASIntCast(ds->n-ds->l,&n1);
521: off = l+l*ld;
522: DSGetArrayReal(ds,DS_MAT_T,&d);
523: e = d+ld;
525: /* Reduce to tridiagonal form */
526: DSIntermediate_HEP(ds);
528: /* Solve the tridiagonal eigenproblem */
529: for (i=0;i<l;i++) wr[i] = d[i];
531: lrwork = 5*n1*n1+3*n1+1;
532: liwork = 5*n1*n1+6*n1+6;
533: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
534: #if !defined(PETSC_USE_COMPLEX)
535: DSAllocateWork_Private(ds,0,lrwork,liwork);
536: PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
537: #else
538: lwork = ld*ld;
539: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
540: PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
541: /* Fixing Lapack bug*/
542: for (j=ds->l;j<ds->n;j++)
543: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
544: #endif
545: SlepcCheckLapackInfo("stedc",info);
546: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
547: for (i=l;i<ds->n;i++) wr[i] = d[i];
549: /* Create diagonal matrix as a result */
550: if (ds->compact) PetscArrayzero(e,ds->n-1);
551: else {
552: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
553: for (i=l;i<ds->n;i++) PetscArrayzero(A+l+i*ld,ds->n-l);
554: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
555: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
556: }
557: DSRestoreArrayReal(ds,DS_MAT_T,&d);
559: /* Set zero wi */
560: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
561: return 0;
562: }
564: #if !defined(PETSC_USE_COMPLEX)
565: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
566: {
567: PetscBLASInt i,j,k,m,n = 0,info,nblks,bs = 0,ld = 0,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
568: PetscScalar *Q,*A;
569: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
573: PetscBLASIntCast(ds->ld,&ld);
574: PetscBLASIntCast(ds->bs,&bs);
575: PetscBLASIntCast(ds->n,&n);
576: nblks = n/bs;
577: DSGetArrayReal(ds,DS_MAT_T,&d);
578: e = d+ld;
579: lrwork = 4*n*n+60*n+1;
580: liwork = 5*n+5*nblks-1;
581: lde = 2*bs+1;
582: DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
583: D = ds->work;
584: E = ds->work+bs*n;
585: rwork = ds->rwork;
586: ksizes = ds->iwork;
587: iwork = ds->iwork+nblks;
588: PetscArrayzero(iwork,liwork);
590: /* Copy matrix to block tridiagonal format */
591: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
592: j=0;
593: for (i=0;i<nblks;i++) {
594: ksizes[i]=bs;
595: for (k=0;k<bs;k++)
596: for (m=0;m<bs;m++)
597: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
598: j = j + bs;
599: }
600: j=0;
601: for (i=0;i<nblks-1;i++) {
602: for (k=0;k<bs;k++)
603: for (m=0;m<bs;m++)
604: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
605: j = j + bs;
606: }
607: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
609: /* Solve the block tridiagonal eigenproblem */
610: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
611: BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
612: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
613: for (i=0;i<ds->n;i++) wr[i] = d[i];
615: /* Create diagonal matrix as a result */
616: if (ds->compact) PetscArrayzero(e,ds->n-1);
617: else {
618: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
619: for (i=0;i<ds->n;i++) PetscArrayzero(A+i*ld,ds->n);
620: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
621: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
622: }
623: DSRestoreArrayReal(ds,DS_MAT_T,&d);
625: /* Set zero wi */
626: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
627: return 0;
628: }
629: #endif
631: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n,PetscBool trim)
632: {
633: PetscInt i,ld=ds->ld,l=ds->l;
634: PetscScalar *A;
636: if (!ds->compact && ds->extrarow) MatDenseGetArray(ds->omat[DS_MAT_A],&A);
637: if (trim) {
638: if (!ds->compact && ds->extrarow) { /* clean extra row */
639: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
640: }
641: ds->l = 0;
642: ds->k = 0;
643: ds->n = n;
644: ds->t = ds->n; /* truncated length equal to the new dimension */
645: } else {
646: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
647: /* copy entries of extra row to the new position, then clean last row */
648: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
649: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
650: }
651: ds->k = (ds->extrarow)? n: 0;
652: ds->t = ds->n; /* truncated length equal to previous dimension */
653: ds->n = n;
654: }
655: if (!ds->compact && ds->extrarow) MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
656: return 0;
657: }
659: #if !defined(PETSC_HAVE_MPIUNI)
660: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
661: {
662: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
663: PetscMPIInt n,rank,off=0,size,ldn,ld3;
664: PetscScalar *A,*Q;
665: PetscReal *T;
667: if (ds->compact) kr = 3*ld;
668: else k = (ds->n-l)*ld;
669: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
670: if (eigr) k += (ds->n-l);
671: DSAllocateWork_Private(ds,k+kr,0,0);
672: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
673: PetscMPIIntCast(ds->n-l,&n);
674: PetscMPIIntCast(ld*(ds->n-l),&ldn);
675: PetscMPIIntCast(ld*3,&ld3);
676: if (ds->compact) DSGetArrayReal(ds,DS_MAT_T,&T);
677: else MatDenseGetArray(ds->omat[DS_MAT_A],&A);
678: if (ds->state>DS_STATE_RAW) MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
679: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
680: if (!rank) {
681: if (ds->compact) MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
682: else MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683: if (ds->state>DS_STATE_RAW) MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
684: if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
685: }
686: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
687: if (rank) {
688: if (ds->compact) MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
689: else MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
690: if (ds->state>DS_STATE_RAW) MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
691: if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
692: }
693: if (ds->compact) DSRestoreArrayReal(ds,DS_MAT_T,&T);
694: else MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
695: if (ds->state>DS_STATE_RAW) MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
696: return 0;
697: }
698: #endif
700: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
701: {
702: PetscScalar *work;
703: PetscReal *rwork;
704: PetscBLASInt *ipiv;
705: PetscBLASInt lwork,info,n,ld;
706: PetscReal hn,hin;
707: PetscScalar *A;
709: PetscBLASIntCast(ds->n,&n);
710: PetscBLASIntCast(ds->ld,&ld);
711: lwork = 8*ld;
712: DSAllocateWork_Private(ds,lwork,ld,ld);
713: work = ds->work;
714: rwork = ds->rwork;
715: ipiv = ds->iwork;
716: DSSwitchFormat_HEP(ds);
718: /* use workspace matrix W to avoid overwriting A */
719: DSAllocateMat_Private(ds,DS_MAT_W);
720: MatCopy(ds->omat[DS_MAT_A],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
721: MatDenseGetArray(ds->omat[DS_MAT_W],&A);
723: /* norm of A */
724: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
726: /* norm of inv(A) */
727: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
728: SlepcCheckLapackInfo("getrf",info);
729: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
730: SlepcCheckLapackInfo("getri",info);
731: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
732: MatDenseRestoreArray(ds->omat[DS_MAT_W],&A);
734: *cond = hn*hin;
735: return 0;
736: }
738: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
739: {
740: PetscInt i,j,k=ds->k;
741: PetscScalar *Q,*A,*R,*tau,*work;
742: PetscBLASInt ld,n1,n0,lwork,info;
744: PetscBLASIntCast(ds->ld,&ld);
745: DSAllocateWork_Private(ds,ld*ld,0,0);
746: tau = ds->work;
747: work = ds->work+ld;
748: PetscBLASIntCast(ld*(ld-1),&lwork);
749: DSAllocateMat_Private(ds,DS_MAT_W);
750: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
751: MatDenseGetArrayWrite(ds->omat[DS_MAT_Q],&Q);
752: MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&R);
754: /* copy I+alpha*A */
755: PetscArrayzero(Q,ld*ld);
756: PetscArrayzero(R,ld*ld);
757: for (i=0;i<k;i++) {
758: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
759: Q[k+i*ld] = alpha*A[k+i*ld];
760: }
762: /* compute qr */
763: PetscBLASIntCast(k+1,&n1);
764: PetscBLASIntCast(k,&n0);
765: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
766: SlepcCheckLapackInfo("geqrf",info);
768: /* copy R from Q */
769: for (j=0;j<k;j++)
770: for (i=0;i<=j;i++)
771: R[i+j*ld] = Q[i+j*ld];
773: /* compute orthogonal matrix in Q */
774: PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
775: SlepcCheckLapackInfo("orgqr",info);
777: /* compute the updated matrix of projected problem */
778: for (j=0;j<k;j++)
779: for (i=0;i<k+1;i++)
780: A[j*ld+i] = Q[i*ld+j];
781: alpha = -1.0/alpha;
782: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
783: for (i=0;i<k;i++)
784: A[ld*i+i] -= alpha;
786: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
787: MatDenseRestoreArrayWrite(ds->omat[DS_MAT_Q],&Q);
788: MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&R);
789: return 0;
790: }
792: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
793: {
794: if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
795: else *flg = PETSC_FALSE;
796: return 0;
797: }
799: /*MC
800: DSHEP - Dense Hermitian Eigenvalue Problem.
802: Level: beginner
804: Notes:
805: The problem is expressed as A*X = X*Lambda, where A is real symmetric
806: (or complex Hermitian). Lambda is a diagonal matrix whose diagonal
807: elements are the arguments of DSSolve(). After solve, A is overwritten
808: with Lambda.
810: In the intermediate state A is reduced to tridiagonal form. In compact
811: storage format, the symmetric tridiagonal matrix is stored in T.
813: Used DS matrices:
814: + DS_MAT_A - problem matrix
815: . DS_MAT_T - symmetric tridiagonal matrix
816: - DS_MAT_Q - orthogonal/unitary transformation that reduces to tridiagonal form
817: (intermediate step) or matrix of orthogonal eigenvectors, which is equal to X
819: Implemented methods:
820: + 0 - Implicit QR (_steqr)
821: . 1 - Multiple Relatively Robust Representations (_stevr)
822: . 2 - Divide and Conquer (_stedc)
823: - 3 - Block Divide and Conquer (real scalars only)
825: .seealso: DSCreate(), DSSetType(), DSType
826: M*/
827: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
828: {
829: ds->ops->allocate = DSAllocate_HEP;
830: ds->ops->view = DSView_HEP;
831: ds->ops->vectors = DSVectors_HEP;
832: ds->ops->solve[0] = DSSolve_HEP_QR;
833: ds->ops->solve[1] = DSSolve_HEP_MRRR;
834: ds->ops->solve[2] = DSSolve_HEP_DC;
835: #if !defined(PETSC_USE_COMPLEX)
836: ds->ops->solve[3] = DSSolve_HEP_BDC;
837: #endif
838: ds->ops->sort = DSSort_HEP;
839: #if !defined(PETSC_HAVE_MPIUNI)
840: ds->ops->synchronize = DSSynchronize_HEP;
841: #endif
842: ds->ops->truncate = DSTruncate_HEP;
843: ds->ops->update = DSUpdateExtraRow_HEP;
844: ds->ops->cond = DSCond_HEP;
845: ds->ops->transrks = DSTranslateRKS_HEP;
846: ds->ops->hermitian = DSHermitian_HEP;
847: return 0;
848: }