Actual source code: epskrylov.c
slepc-3.7.3 2016-09-29
1: /*
2: Common subroutines for all Krylov-type solvers.
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/epsimpl.h>
25: #include <slepc/private/slepcimpl.h>
26: #include <slepcblaslapack.h>
30: /*
31: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
32: columns are assumed to be locked and therefore they are not modified. On
33: exit, the following relation is satisfied:
35: OP * V - V * H = beta*v_m * e_m^T
37: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
38: H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
39: On exit, beta contains the B-norm of V[m] before normalization.
40: */
41: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
42: {
44: PetscInt j,m = *M;
45: Vec vj,vj1;
48: BVSetActiveColumns(eps->V,0,m);
49: for (j=k;j<m;j++) {
50: BVGetColumn(eps->V,j,&vj);
51: BVGetColumn(eps->V,j+1,&vj1);
52: if (trans) {
53: STApplyTranspose(eps->st,vj,vj1);
54: } else {
55: STApply(eps->st,vj,vj1);
56: }
57: BVRestoreColumn(eps->V,j,&vj);
58: BVRestoreColumn(eps->V,j+1,&vj1);
59: BVOrthogonalizeColumn(eps->V,j+1,H+ldh*j,beta,breakdown);
60: if (*breakdown) {
61: H[j+1+ldh*j] = 0.0;
62: *M = j+1;
63: break;
64: } else {
65: H[j+1+ldh*j] = *beta;
66: BVScaleColumn(eps->V,j+1,1.0/(*beta));
67: }
68: }
69: return(0);
70: }
74: /*
75: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
76: performs the computation in a different way. The main idea is that
77: reorthogonalization is delayed to the next Arnoldi step. This version is
78: more scalable but in some cases convergence may stagnate.
79: */
80: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
81: {
83: PetscInt i,j,m=*M;
84: Vec u,t;
85: PetscScalar shh[100],*lhh,dot,dot2;
86: PetscReal norm1=0.0,norm2=1.0;
87: Vec vj,vj1,vj2;
90: if (m<=100) lhh = shh;
91: else {
92: PetscMalloc1(m,&lhh);
93: }
94: BVCreateVec(eps->V,&u);
95: BVCreateVec(eps->V,&t);
97: BVSetActiveColumns(eps->V,0,m);
98: for (j=k;j<m;j++) {
99: BVGetColumn(eps->V,j,&vj);
100: BVGetColumn(eps->V,j+1,&vj1);
101: STApply(eps->st,vj,vj1);
102: BVRestoreColumn(eps->V,j,&vj);
103: BVRestoreColumn(eps->V,j+1,&vj1);
105: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
106: if (j>k) {
107: BVDotColumnBegin(eps->V,j,lhh);
108: BVGetColumn(eps->V,j,&vj);
109: VecDotBegin(vj,vj,&dot);
110: }
111: if (j>k+1) {
112: BVNormVecBegin(eps->V,u,NORM_2,&norm2);
113: BVGetColumn(eps->V,j-2,&vj2);
114: VecDotBegin(u,vj2,&dot2);
115: }
117: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
118: if (j>k) {
119: BVDotColumnEnd(eps->V,j,lhh);
120: VecDotEnd(vj,vj,&dot);
121: BVRestoreColumn(eps->V,j,&vj);
122: }
123: if (j>k+1) {
124: BVNormVecEnd(eps->V,u,NORM_2,&norm2);
125: VecDotEnd(u,vj2,&dot2);
126: BVRestoreColumn(eps->V,j-2,&vj2);
127: }
129: if (j>k) {
130: norm1 = PetscSqrtReal(PetscRealPart(dot));
131: for (i=0;i<j;i++)
132: H[ldh*j+i] = H[ldh*j+i]/norm1;
133: H[ldh*j+j] = H[ldh*j+j]/dot;
135: BVCopyVec(eps->V,j,t);
136: BVScaleColumn(eps->V,j,1.0/norm1);
137: BVScaleColumn(eps->V,j+1,1.0/norm1);
138: }
140: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
142: if (j>k) {
143: BVSetActiveColumns(eps->V,0,j);
144: BVMultVec(eps->V,-1.0,1.0,t,lhh);
145: BVSetActiveColumns(eps->V,0,m);
146: for (i=0;i<j;i++)
147: H[ldh*(j-1)+i] += lhh[i];
148: }
150: if (j>k+1) {
151: BVGetColumn(eps->V,j-1,&vj1);
152: VecCopy(u,vj1);
153: BVRestoreColumn(eps->V,j-1,&vj1);
154: BVScaleColumn(eps->V,j-1,1.0/norm2);
155: H[ldh*(j-2)+j-1] = norm2;
156: }
158: if (j<m-1) {
159: VecCopy(t,u);
160: }
161: }
163: BVNormVec(eps->V,t,NORM_2,&norm2);
164: VecScale(t,1.0/norm2);
165: BVGetColumn(eps->V,m-1,&vj1);
166: VecCopy(t,vj1);
167: BVRestoreColumn(eps->V,m-1,&vj1);
168: H[ldh*(m-2)+m-1] = norm2;
170: BVDotColumn(eps->V,m,lhh);
172: BVMultColumn(eps->V,-1.0,1.0,m,lhh);
173: for (i=0;i<m;i++)
174: H[ldh*(m-1)+i] += lhh[i];
176: BVNormColumn(eps->V,m,NORM_2,beta);
177: BVScaleColumn(eps->V,m,1.0 / *beta);
178: *breakdown = PETSC_FALSE;
180: if (m>100) { PetscFree(lhh); }
181: VecDestroy(&u);
182: VecDestroy(&t);
183: return(0);
184: }
188: /*
189: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
190: but without reorthogonalization (only delayed normalization).
191: */
192: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
193: {
195: PetscInt i,j,m=*M;
196: PetscScalar dot;
197: PetscReal norm=0.0;
198: Vec vj,vj1;
201: BVSetActiveColumns(eps->V,0,m);
202: for (j=k;j<m;j++) {
203: BVGetColumn(eps->V,j,&vj);
204: BVGetColumn(eps->V,j+1,&vj1);
205: STApply(eps->st,vj,vj1);
206: BVRestoreColumn(eps->V,j+1,&vj1);
207: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
208: if (j>k) {
209: VecDotBegin(vj,vj,&dot);
210: }
211: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
212: if (j>k) {
213: VecDotEnd(vj,vj,&dot);
214: }
215: BVRestoreColumn(eps->V,j,&vj);
217: if (j>k) {
218: norm = PetscSqrtReal(PetscRealPart(dot));
219: BVScaleColumn(eps->V,j,1.0/norm);
220: H[ldh*(j-1)+j] = norm;
222: for (i=0;i<j;i++)
223: H[ldh*j+i] = H[ldh*j+i]/norm;
224: H[ldh*j+j] = H[ldh*j+j]/dot;
225: BVScaleColumn(eps->V,j+1,1.0/norm);
226: *beta = norm;
227: }
228: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
229: }
231: *breakdown = PETSC_FALSE;
232: return(0);
233: }
237: /*
238: EPSKrylovConvergence - Implements the loop that checks for convergence
239: in Krylov methods.
241: Input Parameters:
242: eps - the eigensolver; some error estimates are updated in eps->errest
243: getall - whether all residuals must be computed
244: kini - initial value of k (the loop variable)
245: nits - number of iterations of the loop
246: V - set of basis vectors (used only if trueresidual is activated)
247: nv - number of vectors to process (dimension of Q, columns of V)
248: beta - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
249: corrf - correction factor for residual estimates (only in harmonic KS)
251: Output Parameters:
252: kout - the first index where the convergence test failed
253: */
254: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal corrf,PetscInt *kout)
255: {
257: PetscInt k,newk,marker,ld,inside;
258: PetscScalar re,im,*Zr,*Zi,*X;
259: PetscReal resnorm;
260: PetscBool isshift,refined,istrivial;
261: Vec x,y,w[3];
264: RGIsTrivial(eps->rg,&istrivial);
265: if (eps->trueres) {
266: BVCreateVec(eps->V,&x);
267: BVCreateVec(eps->V,&y);
268: BVCreateVec(eps->V,&w[0]);
269: BVCreateVec(eps->V,&w[2]);
270: #if !defined(PETSC_USE_COMPLEX)
271: BVCreateVec(eps->V,&w[1]);
272: #else
273: w[1] = NULL;
274: #endif
275: }
276: DSGetLeadingDimension(eps->ds,&ld);
277: DSGetRefined(eps->ds,&refined);
278: PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
279: marker = -1;
280: if (eps->trackall) getall = PETSC_TRUE;
281: for (k=kini;k<kini+nits;k++) {
282: /* eigenvalue */
283: re = eps->eigr[k];
284: im = eps->eigi[k];
285: if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
286: STBackTransform(eps->st,1,&re,&im);
287: }
288: if (!istrivial) {
289: RGCheckInside(eps->rg,1,&re,&im,&inside);
290: if (marker==-1 && inside<0) marker = k;
291: if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) { /* make sure eps->converged below uses the right value */
292: re = eps->eigr[k];
293: im = eps->eigi[k];
294: }
295: }
296: newk = k;
297: DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
298: if (eps->trueres) {
299: DSGetArray(eps->ds,DS_MAT_X,&X);
300: Zr = X+k*ld;
301: if (newk==k+1) Zi = X+newk*ld;
302: else Zi = NULL;
303: EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
304: DSRestoreArray(eps->ds,DS_MAT_X,&X);
305: EPSComputeResidualNorm_Private(eps,re,im,x,y,w,&resnorm);
306: }
307: else if (!refined) resnorm *= beta*corrf;
308: /* error estimate */
309: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
310: if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
311: if (newk==k+1) {
312: eps->errest[k+1] = eps->errest[k];
313: k++;
314: }
315: if (marker!=-1 && !getall) break;
316: }
317: if (marker!=-1) k = marker;
318: *kout = k;
319: if (eps->trueres) {
320: VecDestroy(&x);
321: VecDestroy(&y);
322: VecDestroy(&w[0]);
323: VecDestroy(&w[2]);
324: #if !defined(PETSC_USE_COMPLEX)
325: VecDestroy(&w[1]);
326: #endif
327: }
328: return(0);
329: }
333: /*
334: EPSFullLanczos - Computes an m-step Lanczos factorization with full
335: reorthogonalization. At each Lanczos step, the corresponding Lanczos
336: vector is orthogonalized with respect to all previous Lanczos vectors.
337: This is equivalent to computing an m-step Arnoldi factorization and
338: exploting symmetry of the operator.
340: The first k columns are assumed to be locked and therefore they are
341: not modified. On exit, the following relation is satisfied:
343: OP * V - V * T = beta_m*v_m * e_m^T
345: where the columns of V are the Lanczos vectors (which are B-orthonormal),
346: T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
347: the canonical basis. The tridiagonal is stored as two arrays: alpha
348: contains the diagonal elements, beta the off-diagonal. On exit, the last
349: element of beta contains the B-norm of V[m] before normalization.
350: */
351: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
352: {
354: PetscInt j,m = *M;
355: Vec vj,vj1;
356: PetscScalar *hwork,lhwork[100];
359: if (m > 100) {
360: PetscMalloc1(m,&hwork);
361: } else hwork = lhwork;
363: BVSetActiveColumns(eps->V,0,m);
364: for (j=k;j<m;j++) {
365: BVGetColumn(eps->V,j,&vj);
366: BVGetColumn(eps->V,j+1,&vj1);
367: STApply(eps->st,vj,vj1);
368: BVRestoreColumn(eps->V,j,&vj);
369: BVRestoreColumn(eps->V,j+1,&vj1);
370: BVOrthogonalizeColumn(eps->V,j+1,hwork,beta+j,breakdown);
371: alpha[j] = PetscRealPart(hwork[j]);
372: if (*breakdown) {
373: *M = j+1;
374: break;
375: } else {
376: BVScaleColumn(eps->V,j+1,1.0/beta[j]);
377: }
378: }
379: if (m > 100) {
380: PetscFree(hwork);
381: }
382: return(0);
383: }
387: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
388: {
390: PetscInt j,m = *M,i,ld,l;
391: Vec vj,vj1;
392: PetscScalar *hwork,lhwork[100];
393: PetscReal norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
394: PetscBLASInt j_,one=1;
397: DSGetLeadingDimension(eps->ds,&ld);
398: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
399: if (cos) *cos = 1.0;
400: if (m > 100) {
401: PetscMalloc1(m,&hwork);
402: } else hwork = lhwork;
404: BVSetActiveColumns(eps->V,0,m);
405: for (j=k;j<m;j++) {
406: BVGetColumn(eps->V,j,&vj);
407: BVGetColumn(eps->V,j+1,&vj1);
408: STApply(eps->st,vj,vj1);
409: BVRestoreColumn(eps->V,j,&vj);
410: BVRestoreColumn(eps->V,j+1,&vj1);
411: BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
412: alpha[j] = PetscRealPart(hwork[j]);
413: beta[j] = PetscAbsReal(norm);
414: DSGetArrayReal(eps->ds,DS_MAT_T,&f);
415: if (j==k) {
416: for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
417: for (i=0;i<l;i++) hwork[i] = 0.0;
418: }
419: DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
420: hwork[j-1] -= beta[j-1];
421: PetscBLASIntCast(j,&j_);
422: sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
423: fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
424: if (j>0) fro = SlepcAbs(fro,beta[j-1]);
425: if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
426: omega[j+1] = (norm<0.0)? -1.0: 1.0;
427: BVScaleColumn(eps->V,j+1,1.0/norm);
428: /* */
429: if (cos) {
430: BVGetColumn(eps->V,j+1,&vj1);
431: VecNorm(vj1,NORM_2,&norm1);
432: BVApplyMatrix(eps->V,vj1,w);
433: BVRestoreColumn(eps->V,j+1,&vj1);
434: VecNorm(w,NORM_2,&norm2);
435: t = 1.0/(norm1*norm2);
436: if (*cos>t) *cos = t;
437: }
438: }
439: if (m > 100) {
440: PetscFree(hwork);
441: }
442: return(0);
443: }