Actual source code: ks-slice.c
slepc-3.7.3 2016-09-29
1: /*
3: SLEPc eigensolver: "krylovschur"
5: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
7: References:
9: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
10: solving sparse symmetric generalized eigenproblems", SIAM J.
11: Matrix Anal. Appl. 15(1):228-272, 1994.
13: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
14: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
15: 2012.
17: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
18: SLEPc - Scalable Library for Eigenvalue Problem Computations
19: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
21: This file is part of SLEPc.
23: SLEPc is free software: you can redistribute it and/or modify it under the
24: terms of version 3 of the GNU Lesser General Public License as published by
25: the Free Software Foundation.
27: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
28: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
29: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
30: more details.
32: You should have received a copy of the GNU Lesser General Public License
33: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
34: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
35: */
37: #include <slepc/private/epsimpl.h>
38: #include krylovschur.h
40: static PetscBool cited = PETSC_FALSE;
41: static const char citation[] =
42: "@Article{slepc-slice,\n"
43: " author = \"C. Campos and J. E. Roman\",\n"
44: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
45: " journal = \"Numer. Algorithms\",\n"
46: " volume = \"60\",\n"
47: " number = \"2\",\n"
48: " pages = \"279--295\",\n"
49: " year = \"2012,\"\n"
50: " doi = \"http://dx.doi.org/10.1007/s11075-012-9564-z\"\n"
51: "}\n";
53: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
57: static PetscErrorCode EPSSliceResetSR(EPS eps) {
58: PetscErrorCode ierr;
59: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
60: EPS_SR sr=ctx->sr;
61: EPS_shift s;
64: if (sr) {
65: if (ctx->npart>1) {
66: BVDestroy(&sr->V);
67: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
68: }
69: /* Reviewing list of shifts to free memory */
70: s = sr->s0;
71: if (s) {
72: while (s->neighb[1]) {
73: s = s->neighb[1];
74: PetscFree(s->neighb[0]);
75: }
76: PetscFree(s);
77: }
78: PetscFree(sr);
79: }
80: ctx->sr = NULL;
81: return(0);
82: }
86: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
87: {
88: PetscErrorCode ierr;
89: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
92: if (!ctx->global) return(0);
93: /* Destroy auxiliary EPS */
94: EPSSliceResetSR(ctx->eps);
95: EPSDestroy(&ctx->eps);
96: if (ctx->npart>1) {
97: PetscSubcommDestroy(&ctx->subc);
98: if (ctx->commset) {
99: MPI_Comm_free(&ctx->commrank);
100: ctx->commset = PETSC_FALSE;
101: }
102: }
103: PetscFree(ctx->subintervals);
104: PetscFree(ctx->nconv_loc);
105: EPSSliceResetSR(eps);
106: PetscFree(ctx->inertias);
107: PetscFree(ctx->shifts);
108: if (ctx->npart>1) {
109: ISDestroy(&ctx->isrow);
110: ISDestroy(&ctx->iscol);
111: MatDestroyMatrices(1,&ctx->submata);
112: MatDestroyMatrices(1,&ctx->submatb);
113: }
114: return(0);
115: }
119: /*
120: EPSSliceAllocateSolution - Allocate memory storage for common variables such
121: as eigenvalues and eigenvectors. The argument extra is used for methods
122: that require a working basis slightly larger than ncv.
123: */
124: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
125: {
126: PetscErrorCode ierr;
127: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
128: PetscReal eta;
129: PetscInt k;
130: PetscLogDouble cnt;
131: BVType type;
132: BVOrthogType orthog_type;
133: BVOrthogRefineType orthog_ref;
134: BVOrthogBlockType ob_type;
135: Mat matrix;
136: Vec t;
137: EPS_SR sr = ctx->sr;
140: /* allocate space for eigenvalues and friends */
141: k = PetscMax(1,sr->numEigs);
142: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
143: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
144: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
145: PetscLogObjectMemory((PetscObject)eps,cnt);
147: /* allocate sr->V and transfer options from eps->V */
148: BVDestroy(&sr->V);
149: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
150: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
151: if (!eps->V) { EPSGetBV(eps,&eps->V); }
152: if (!((PetscObject)(eps->V))->type_name) {
153: BVSetType(sr->V,BVSVEC);
154: } else {
155: BVGetType(eps->V,&type);
156: BVSetType(sr->V,type);
157: }
158: STMatCreateVecs(eps->st,&t,NULL);
159: BVSetSizesFromVec(sr->V,t,k);
160: VecDestroy(&t);
161: EPS_SetInnerProduct(eps);
162: BVGetMatrix(eps->V,&matrix,NULL);
163: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
164: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
165: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
166: return(0);
167: }
171: static PetscErrorCode EPSSliceGetEPS(EPS eps)
172: {
173: PetscErrorCode ierr;
174: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
175: BV V;
176: BVType type;
177: PetscReal eta;
178: BVOrthogType orthog_type;
179: BVOrthogRefineType orthog_ref;
180: BVOrthogBlockType ob_type;
181: Mat A,B=NULL,Ar,Br=NULL;
182: PetscInt i;
183: PetscReal h,a,b;
184: PetscMPIInt rank;
185: EPS_SR sr=ctx->sr;
186: PC pc;
187: PCType pctype;
188: KSP ksp;
189: KSPType ksptype;
190: STType sttype;
191: PetscObjectState Astate,Bstate=0;
192: PetscObjectId Aid,Bid=0;
193: const MatSolverPackage stype;
196: EPSGetOperators(eps,&A,&B);
197: if (ctx->npart==1) {
198: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
199: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
200: EPSSetOperators(ctx->eps,A,B);
201: a = eps->inta; b = eps->intb;
202: } else {
203: PetscObjectStateGet((PetscObject)A,&Astate);
204: PetscObjectGetId((PetscObject)A,&Aid);
205: if (B) {
206: PetscObjectStateGet((PetscObject)B,&Bstate);
207: PetscObjectGetId((PetscObject)B,&Bid);
208: }
209: if (!ctx->subc) {
210: /* Create context for subcommunicators */
211: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
212: PetscSubcommSetNumber(ctx->subc,ctx->npart);
213: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
214: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
216: /* Duplicate matrices */
217: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
218: ctx->Astate = Astate;
219: ctx->Aid = Aid;
220: if (B) {
221: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
222: ctx->Bstate = Bstate;
223: ctx->Bid = Bid;
224: }
225: } else {
226: if (ctx->Astate != Astate || (B && ctx->Bstate != Bstate) || ctx->Aid != Aid || (B && ctx->Bid != Bid)) {
227: EPSGetOperators(ctx->eps,&Ar,&Br);
228: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
229: ctx->Astate = Astate;
230: ctx->Aid = Aid;
231: if (B) {
232: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
233: ctx->Bstate = Bstate;
234: ctx->Bid = Bid;
235: }
236: EPSSetOperators(ctx->eps,Ar,Br);
237: MatDestroy(&Ar);
238: MatDestroy(&Br);
239: }
240: }
242: /* Determine subintervals */
243: if (!ctx->subintset) { /* uniform distribution if no set by user */
244: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
245: h = (eps->intb-eps->inta)/ctx->npart;
246: a = eps->inta+ctx->subc->color*h;
247: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
248: PetscFree(ctx->subintervals);
249: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
250: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
251: ctx->subintervals[ctx->npart] = eps->intb;
252: } else {
253: a = ctx->subintervals[ctx->subc->color];
254: b = ctx->subintervals[ctx->subc->color+1];
255: }
257: if (!ctx->eps) {
258: /* Create auxiliary EPS */
259: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
260: EPSSetOperators(ctx->eps,Ar,Br);
261: MatDestroy(&Ar);
262: MatDestroy(&Br);
263: }
265: /* Create subcommunicator grouping processes with same rank */
266: if (ctx->commset) { MPI_Comm_free(&ctx->commrank); }
267: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
268: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
269: ctx->commset = PETSC_TRUE;
270: }
271: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
273: /* Transfer options for ST, KSP and PC */
274: STGetType(eps->st,&sttype);
275: STSetType(ctx->eps->st,sttype);
276: STGetKSP(eps->st,&ksp);
277: KSPGetType(ksp,&ksptype);
278: KSPGetPC(ksp,&pc);
279: PCGetType(pc,&pctype);
280: PCFactorGetMatSolverPackage(pc,&stype);
281: STGetKSP(ctx->eps->st,&ksp);
282: KSPSetType(ksp,ksptype);
283: KSPGetPC(ksp,&pc);
284: PCSetType(pc,pctype);
285: if (stype) { PCFactorSetMatSolverPackage(pc,stype); }
287: EPSSetConvergenceTest(ctx->eps,eps->conv);
288: EPSSetInterval(ctx->eps,a,b);
289: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
290: ctx_local->npart = ctx->npart;
291: ctx_local->detect = ctx->detect;
292: ctx_local->global = PETSC_FALSE;
293: ctx_local->eps = eps;
294: ctx_local->subc = ctx->subc;
295: ctx_local->commrank = ctx->commrank;
297: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
298: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
300: /* transfer options from eps->V */
301: EPSGetBV(ctx->eps,&V);
302: if (!eps->V) { EPSGetBV(eps,&eps->V); }
303: if (!((PetscObject)(eps->V))->type_name) {
304: BVSetType(V,BVSVEC);
305: } else {
306: BVGetType(eps->V,&type);
307: BVSetType(V,type);
308: }
309: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
310: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
311: ctx->eps->which = eps->which;
312: ctx->eps->max_it = eps->max_it;
313: ctx->eps->tol = eps->tol;
314: ctx->eps->purify = eps->purify;
315: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
316: EPSSetProblemType(ctx->eps,eps->problem_type);
317: EPSSetUp(ctx->eps);
318: ctx->eps->nconv = 0;
319: ctx->eps->its = 0;
320: for (i=0;i<ctx->eps->ncv;i++) {
321: ctx->eps->eigr[i] = 0.0;
322: ctx->eps->eigi[i] = 0.0;
323: ctx->eps->errest[i] = 0.0;
324: }
325: return(0);
326: }
330: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
331: {
333: KSP ksp;
334: PC pc;
335: Mat F;
336: PetscReal nzshift;
339: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
340: if (inertia) *inertia = eps->n;
341: } else if (shift <= PETSC_MIN_REAL) {
342: if (inertia) *inertia = 0;
343: if (zeros) *zeros = 0;
344: } else {
345: /* If the shift is zero, perturb it to a very small positive value.
346: The goal is that the nonzero pattern is the same in all cases and reuse
347: the symbolic factorizations */
348: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
349: STSetShift(eps->st,nzshift);
350: STSetUp(eps->st);
351: STGetKSP(eps->st,&ksp);
352: KSPGetPC(ksp,&pc);
353: PCFactorGetMatrix(pc,&F);
354: MatGetInertia(F,inertia,zeros,NULL);
355: }
356: return(0);
357: }
361: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
362: {
363: PetscErrorCode ierr;
364: PetscBool issinv;
365: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
366: EPS_SR sr,sr_loc,sr_glob;
367: PetscInt nEigs,dssz=1,i,zeros=0,off=0;
368: PetscMPIInt nproc,rank,aux;
369: PetscReal r;
370: MPI_Request req;
371: Mat A,B=NULL;
374: if (ctx->global) {
375: if (eps->inta==0.0 && eps->intb==0.0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Must define a computational interval when using EPS_ALL");
376: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
377: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
378: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
379: if (!((PetscObject)(eps->st))->type_name) { /* default to shift-and-invert */
380: STSetType(eps->st,STSINVERT);
381: }
382: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
383: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
384: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
385: if (!eps->max_it) eps->max_it = 100;
386: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
387: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
388: }
389: eps->ops->backtransform = NULL;
391: /* create spectrum slicing context and initialize it */
392: EPSSliceResetSR(eps);
393: PetscNewLog(eps,&sr);
394: ctx->sr = sr;
395: sr->itsKs = 0;
396: sr->nleap = 0;
397: sr->nMAXCompl = eps->nev/4;
398: sr->iterCompl = eps->max_it/4;
399: sr->sPres = NULL;
400: sr->nS = 0;
402: if (ctx->npart==1 || ctx->global) {
403: /* check presence of ends and finding direction */
404: if ((eps->inta > PETSC_MIN_REAL && eps->inta != 0.0) || eps->intb >= PETSC_MAX_REAL) {
405: sr->int0 = eps->inta;
406: sr->int1 = eps->intb;
407: sr->dir = 1;
408: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
409: sr->hasEnd = PETSC_FALSE;
410: } else sr->hasEnd = PETSC_TRUE;
411: } else {
412: sr->int0 = eps->intb;
413: sr->int1 = eps->inta;
414: sr->dir = -1;
415: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
416: }
417: }
418: if (ctx->global) {
419: /* prevent computation of factorization in global eps */
420: STSetTransform(eps->st,PETSC_FALSE);
421: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
422: /* create subintervals and initialize auxiliary eps for slicing runs */
423: EPSSliceGetEPS(eps);
424: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
425: if (ctx->npart>1) {
426: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
427: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
428: if (rank==0) {
429: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
430: }
431: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
432: PetscFree(ctx->nconv_loc);
433: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
434: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
435: if (sr->dir<0) off = 1;
436: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
437: PetscMPIIntCast(sr_loc->numEigs,&aux);
438: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
439: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
440: } else {
441: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
442: if (!rank) {
443: PetscMPIIntCast(sr_loc->numEigs,&aux);
444: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
445: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
446: }
447: PetscMPIIntCast(ctx->npart,&aux);
448: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
449: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
450: }
451: nEigs = 0;
452: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
453: } else {
454: nEigs = sr_loc->numEigs;
455: sr->inertia0 = sr_loc->inertia0;
456: }
457: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
458: sr->numEigs = nEigs;
459: eps->nev = nEigs;
460: eps->ncv = nEigs;
461: eps->mpd = nEigs;
462: } else {
463: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
464: sr_glob = ctx_glob->sr;
465: if (ctx->npart>1) {
466: sr->dir = sr_glob->dir;
467: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
468: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
469: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
470: else sr->hasEnd = PETSC_TRUE;
471: }
473: /* compute inertia0 */
474: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
475: if (zeros) { /* error in factorization */
476: if (ctx->npart==1 || ctx_glob->subintset || ((sr->dir>0 && ctx->subc->color==0) || (sr->dir<0 && ctx->subc->color==ctx->npart-1))) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
477: else { /* perturb shift */
478: sr->int0 *= (1.0+SLICE_PTOL);
479: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
480: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
481: }
482: }
483: if (ctx->npart>1) {
484: /* inertia1 is received from neighbour */
485: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
486: if (!rank) {
487: if ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) { /* send inertia0 to neighbour0 */
488: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
489: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
490: }
491: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
492: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
493: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
494: }
495: }
496: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
497: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
498: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
499: } else sr_glob->inertia1 = sr->inertia1;
500: }
502: /* last process in eps comm computes inertia1 */
503: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
504: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
505: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
506: if (sr->hasEnd) {
507: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
508: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
509: }
510: }
512: /* number of eigenvalues in interval */
513: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
514: if (ctx->npart>1) {
515: /* memory allocate for subinterval eigenpairs */
516: EPSSliceAllocateSolution(eps,1);
517: }
518: dssz = eps->ncv+1;
519: }
520: DSSetType(eps->ds,DSHEP);
521: DSSetCompact(eps->ds,PETSC_TRUE);
522: DSAllocate(eps->ds,dssz);
523: /* keep state of subcomm matrices to check that the user does not modify them */
524: EPSGetOperators(eps,&A,&B);
525: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
526: PetscObjectGetId((PetscObject)A,&ctx->Aid);
527: if (B) {
528: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
529: PetscObjectGetId((PetscObject)B,&ctx->Bid);
530: } else {
531: ctx->Bstate=0;
532: ctx->Bid=0;
533: }
534: return(0);
535: }
539: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
540: {
541: PetscErrorCode ierr;
542: Vec v,vg,v_loc;
543: IS is1,is2;
544: VecScatter vec_sc;
545: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
546: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
547: PetscScalar *array;
548: EPS_SR sr_loc;
549: BV V_loc;
552: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
553: V_loc = sr_loc->V;
555: /* Gather parallel eigenvectors */
556: BVGetColumn(eps->V,0,&v);
557: VecGetOwnershipRange(v,&n0,&m0);
558: BVRestoreColumn(eps->V,0,&v);
559: BVGetColumn(ctx->eps->V,0,&v);
560: VecGetLocalSize(v,&nloc);
561: BVRestoreColumn(ctx->eps->V,0,&v);
562: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
563: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
564: idx = -1;
565: for (si=0;si<ctx->npart;si++) {
566: j = 0;
567: for (i=n0;i<m0;i++) {
568: idx1[j] = i;
569: idx2[j++] = i+eps->n*si;
570: }
571: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
572: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
573: BVGetColumn(eps->V,0,&v);
574: VecScatterCreate(v,is1,vg,is2,&vec_sc);
575: BVRestoreColumn(eps->V,0,&v);
576: ISDestroy(&is1);
577: ISDestroy(&is2);
578: for (i=0;i<ctx->nconv_loc[si];i++) {
579: BVGetColumn(eps->V,++idx,&v);
580: if (ctx->subc->color==si) {
581: BVGetColumn(V_loc,i,&v_loc);
582: VecGetArray(v_loc,&array);
583: VecPlaceArray(vg,array);
584: }
585: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
586: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
587: if (ctx->subc->color==si) {
588: VecResetArray(vg);
589: VecRestoreArray(v_loc,&array);
590: BVRestoreColumn(V_loc,i,&v_loc);
591: }
592: BVRestoreColumn(eps->V,idx,&v);
593: }
594: VecScatterDestroy(&vec_sc);
595: }
596: PetscFree2(idx1,idx2);
597: VecDestroy(&vg);
598: return(0);
599: }
603: /*
604: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
605: */
606: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
607: {
608: PetscErrorCode ierr;
609: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
612: if (ctx->global && ctx->npart>1) {
613: EPSComputeVectors(ctx->eps);
614: EPSSliceGatherEigenVectors(eps);
615: }
616: return(0);
617: }
619: #define SWAP(a,b,t) {t=a;a=b;b=t;}
623: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
624: {
625: PetscErrorCode ierr;
626: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
627: PetscInt i=0,j,tmpi;
628: PetscReal v,tmpr;
629: EPS_shift s;
632: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
633: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
634: if (!ctx->sr->s0) { /* EPSSolve not called yet */
635: *n = 2;
636: } else {
637: *n = 1;
638: s = ctx->sr->s0;
639: while (s) {
640: (*n)++;
641: s = s->neighb[1];
642: }
643: }
644: PetscMalloc1(*n,shifts);
645: PetscMalloc1(*n,inertias);
646: if (!ctx->sr->s0) { /* EPSSolve not called yet */
647: (*shifts)[0] = ctx->sr->int0;
648: (*shifts)[1] = ctx->sr->int1;
649: (*inertias)[0] = ctx->sr->inertia0;
650: (*inertias)[1] = ctx->sr->inertia1;
651: } else {
652: s = ctx->sr->s0;
653: while (s) {
654: (*shifts)[i] = s->value;
655: (*inertias)[i++] = s->inertia;
656: s = s->neighb[1];
657: }
658: (*shifts)[i] = ctx->sr->int1;
659: (*inertias)[i] = ctx->sr->inertia1;
660: }
661: /* remove possible duplicate in last position */
662: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
663: /* sort result */
664: for (i=0;i<*n;i++) {
665: v = (*shifts)[i];
666: for (j=i+1;j<*n;j++) {
667: if (v > (*shifts)[j]) {
668: SWAP((*shifts)[i],(*shifts)[j],tmpr);
669: SWAP((*inertias)[i],(*inertias)[j],tmpi);
670: v = (*shifts)[i];
671: }
672: }
673: }
674: return(0);
675: }
679: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
680: {
681: PetscErrorCode ierr;
682: PetscMPIInt rank,nproc;
683: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
684: PetscInt i,idx,j;
685: PetscInt *perm_loc,off=0,*inertias_loc,ns;
686: PetscScalar *eigr_loc;
687: EPS_SR sr_loc;
688: PetscReal *shifts_loc;
689: PetscMPIInt *disp,*ns_loc,aux;
692: eps->nconv = 0;
693: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
694: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
696: /* Gather the shifts used and the inertias computed */
697: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
698: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
699: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
700: ns--;
701: for (i=0;i<ns;i++) {
702: inertias_loc[i] = inertias_loc[i+1];
703: shifts_loc[i] = shifts_loc[i+1];
704: }
705: }
706: PetscMalloc1(ctx->npart,&ns_loc);
707: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
708: PetscMPIIntCast(ns,&aux);
709: if (rank==0) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
710: PetscMPIIntCast(ctx->npart,&aux);
711: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
712: ctx->nshifts = 0;
713: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
714: PetscFree(ctx->inertias);
715: PetscFree(ctx->shifts);
716: PetscMalloc1(ctx->nshifts,&ctx->inertias);
717: PetscMalloc1(ctx->nshifts,&ctx->shifts);
719: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
720: eigr_loc = sr_loc->eigr;
721: perm_loc = sr_loc->perm;
722: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
723: PetscMalloc1(ctx->npart,&disp);
724: disp[0] = 0;
725: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
726: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
727: PetscMPIIntCast(sr_loc->numEigs,&aux);
728: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
729: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
730: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
731: PetscMPIIntCast(ns,&aux);
732: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
733: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
734: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
735: } else { /* subcommunicators with different size */
736: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
737: if (rank==0) {
738: PetscMPIIntCast(sr_loc->numEigs,&aux);
739: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
740: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
741: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
742: PetscMPIIntCast(ns,&aux);
743: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
744: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
745: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
746: }
747: PetscMPIIntCast(eps->nconv,&aux);
748: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
749: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
750: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
751: PetscMPIIntCast(ctx->nshifts,&aux);
752: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
753: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
754: }
755: /* Update global array eps->perm */
756: idx = ctx->nconv_loc[0];
757: for (i=1;i<ctx->npart;i++) {
758: off += ctx->nconv_loc[i-1];
759: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
760: }
762: /* Gather parallel eigenvectors */
763: PetscFree(ns_loc);
764: PetscFree(disp);
765: PetscFree(shifts_loc);
766: PetscFree(inertias_loc);
767: return(0);
768: }
770: /*
771: Fills the fields of a shift structure
772: */
775: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
776: {
777: PetscErrorCode ierr;
778: EPS_shift s,*pending2;
779: PetscInt i;
780: EPS_SR sr;
781: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
784: sr = ctx->sr;
785: PetscNewLog(eps,&s);
786: s->value = val;
787: s->neighb[0] = neighb0;
788: if (neighb0) neighb0->neighb[1] = s;
789: s->neighb[1] = neighb1;
790: if (neighb1) neighb1->neighb[0] = s;
791: s->comp[0] = PETSC_FALSE;
792: s->comp[1] = PETSC_FALSE;
793: s->index = -1;
794: s->neigs = 0;
795: s->nconv[0] = s->nconv[1] = 0;
796: s->nsch[0] = s->nsch[1]=0;
797: /* Inserts in the stack of pending shifts */
798: /* If needed, the array is resized */
799: if (sr->nPend >= sr->maxPend) {
800: sr->maxPend *= 2;
801: PetscMalloc1(sr->maxPend,&pending2);
802: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
803: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
804: PetscFree(sr->pending);
805: sr->pending = pending2;
806: }
807: sr->pending[sr->nPend++]=s;
808: return(0);
809: }
811: /* Prepare for Rational Krylov update */
814: static PetscErrorCode EPSPrepareRational(EPS eps)
815: {
816: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
817: PetscErrorCode ierr;
818: PetscInt dir,i,k,ld,nv;
819: PetscScalar *A;
820: EPS_SR sr = ctx->sr;
821: Vec v;
824: DSGetLeadingDimension(eps->ds,&ld);
825: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
826: dir*=sr->dir;
827: k = 0;
828: for (i=0;i<sr->nS;i++) {
829: if (dir*PetscRealPart(sr->S[i])>0.0) {
830: sr->S[k] = sr->S[i];
831: sr->S[sr->nS+k] = sr->S[sr->nS+i];
832: BVGetColumn(sr->Vnext,k,&v);
833: BVCopyVec(eps->V,eps->nconv+i,v);
834: BVRestoreColumn(sr->Vnext,k,&v);
835: k++;
836: if (k>=sr->nS/2)break;
837: }
838: }
839: /* Copy to DS */
840: DSGetArray(eps->ds,DS_MAT_A,&A);
841: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
842: for (i=0;i<k;i++) {
843: A[i*(1+ld)] = sr->S[i];
844: A[k+i*ld] = sr->S[sr->nS+i];
845: }
846: sr->nS = k;
847: DSRestoreArray(eps->ds,DS_MAT_A,&A);
848: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
849: DSSetDimensions(eps->ds,nv,0,0,k);
850: /* Append u to V */
851: BVGetColumn(sr->Vnext,sr->nS,&v);
852: BVCopyVec(eps->V,sr->nv,v);
853: BVRestoreColumn(sr->Vnext,sr->nS,&v);
854: return(0);
855: }
857: /* Provides next shift to be computed */
860: static PetscErrorCode EPSExtractShift(EPS eps)
861: {
862: PetscErrorCode ierr;
863: PetscInt iner,zeros=0;
864: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
865: EPS_SR sr;
866: PetscReal newShift;
867: EPS_shift sPres;
870: sr = ctx->sr;
871: if (sr->nPend > 0) {
872: sr->sPrev = sr->sPres;
873: sr->sPres = sr->pending[--sr->nPend];
874: sPres = sr->sPres;
875: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
876: if (zeros) {
877: newShift = sPres->value*(1.0+SLICE_PTOL);
878: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
879: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
880: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
881: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
882: sPres->value = newShift;
883: }
884: sr->sPres->inertia = iner;
885: eps->target = sr->sPres->value;
886: eps->reason = EPS_CONVERGED_ITERATING;
887: eps->its = 0;
888: } else sr->sPres = NULL;
889: return(0);
890: }
892: /*
893: Symmetric KrylovSchur adapted to spectrum slicing:
894: Allows searching an specific amount of eigenvalues in the subintervals left and right.
895: Returns whether the search has succeeded
896: */
899: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
900: {
901: PetscErrorCode ierr;
902: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
903: PetscInt i,conv,k,l,ld,nv,*iwork,j,p;
904: Mat U;
905: PetscScalar *Q,*A,rtmp;
906: PetscReal *a,*b,beta;
907: PetscBool breakdown;
908: PetscInt count0,count1;
909: PetscReal lambda;
910: EPS_shift sPres;
911: PetscBool complIterating;
912: PetscBool sch0,sch1;
913: PetscInt iterCompl=0,n0,n1;
914: EPS_SR sr = ctx->sr;
917: /* Spectrum slicing data */
918: sPres = sr->sPres;
919: complIterating =PETSC_FALSE;
920: sch1 = sch0 = PETSC_TRUE;
921: DSGetLeadingDimension(eps->ds,&ld);
922: PetscMalloc1(2*ld,&iwork);
923: count0=0;count1=0; /* Found on both sides */
924: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
925: /* Rational Krylov */
926: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
927: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
928: DSSetDimensions(eps->ds,l+1,0,0,0);
929: BVSetActiveColumns(eps->V,0,l+1);
930: DSGetMat(eps->ds,DS_MAT_Q,&U);
931: BVMultInPlace(eps->V,U,0,l+1);
932: MatDestroy(&U);
933: } else {
934: /* Get the starting Lanczos vector */
935: EPSGetStartVector(eps,0,NULL);
936: l = 0;
937: }
938: /* Restart loop */
939: while (eps->reason == EPS_CONVERGED_ITERATING) {
940: eps->its++; sr->itsKs++;
941: /* Compute an nv-step Lanczos factorization */
942: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
943: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
944: b = a + ld;
945: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
946: sr->nv = nv;
947: beta = b[nv-1];
948: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
949: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
950: if (l==0) {
951: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
952: } else {
953: DSSetState(eps->ds,DS_STATE_RAW);
954: }
955: BVSetActiveColumns(eps->V,eps->nconv,nv);
957: /* Solve projected problem and compute residual norm estimates */
958: if (eps->its == 1 && l > 0) {/* After rational update */
959: DSGetArray(eps->ds,DS_MAT_A,&A);
960: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
961: b = a + ld;
962: k = eps->nconv+l;
963: A[k*ld+k-1] = A[(k-1)*ld+k];
964: A[k*ld+k] = a[k];
965: for (j=k+1; j< nv; j++) {
966: A[j*ld+j] = a[j];
967: A[j*ld+j-1] = b[j-1] ;
968: A[(j-1)*ld+j] = b[j-1];
969: }
970: DSRestoreArray(eps->ds,DS_MAT_A,&A);
971: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
972: DSSolve(eps->ds,eps->eigr,NULL);
973: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
974: DSSetCompact(eps->ds,PETSC_TRUE);
975: } else { /* Restart */
976: DSSolve(eps->ds,eps->eigr,NULL);
977: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
978: }
979: /* Residual */
980: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
982: if (ctx->lock) {
983: /* Check convergence */
984: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
985: b = a + ld;
986: conv = 0;
987: j = k = eps->nconv;
988: for (i=eps->nconv;i<nv;i++) if (eps->errest[i] < eps->tol) conv++;
989: for (i=eps->nconv;i<nv;i++) {
990: if (eps->errest[i] < eps->tol) {
991: iwork[j++]=i;
992: } else iwork[conv+k++]=i;
993: }
994: for (i=eps->nconv;i<nv;i++) {
995: a[i]=PetscRealPart(eps->eigr[i]);
996: b[i]=eps->errest[i];
997: }
998: for (i=eps->nconv;i<nv;i++) {
999: eps->eigr[i] = a[iwork[i]];
1000: eps->errest[i] = b[iwork[i]];
1001: }
1002: for (i=eps->nconv;i<nv;i++) {
1003: a[i]=PetscRealPart(eps->eigr[i]);
1004: b[i]=eps->errest[i];
1005: }
1006: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1007: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1008: for (i=eps->nconv;i<nv;i++) {
1009: p=iwork[i];
1010: if (p!=i) {
1011: j=i+1;
1012: while (iwork[j]!=i) j++;
1013: iwork[j]=p;iwork[i]=i;
1014: for (k=0;k<nv;k++) {
1015: rtmp=Q[k+p*ld];Q[k+p*ld]=Q[k+i*ld];Q[k+i*ld]=rtmp;
1016: }
1017: }
1018: }
1019: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1020: k=eps->nconv+conv;
1021: }
1023: /* Checking values obtained for completing */
1024: for (i=0;i<k;i++) {
1025: sr->back[i]=eps->eigr[i];
1026: }
1027: STBackTransform(eps->st,k,sr->back,eps->eigi);
1028: count0=count1=0;
1029: for (i=0;i<k;i++) {
1030: lambda = PetscRealPart(sr->back[i]);
1031: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
1032: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
1033: }
1034: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
1035: else {
1036: /* Checks completion */
1037: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
1038: eps->reason = EPS_CONVERGED_TOL;
1039: } else {
1040: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
1041: if (complIterating) {
1042: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
1043: } else if (k >= eps->nev) {
1044: n0 = sPres->nsch[0]-count0;
1045: n1 = sPres->nsch[1]-count1;
1046: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
1047: /* Iterating for completion*/
1048: complIterating = PETSC_TRUE;
1049: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
1050: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
1051: iterCompl = sr->iterCompl;
1052: } else eps->reason = EPS_CONVERGED_TOL;
1053: }
1054: }
1055: }
1056: /* Update l */
1057: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
1058: else l = 0;
1059: if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
1060: if (breakdown) l=0;
1062: if (eps->reason == EPS_CONVERGED_ITERATING) {
1063: if (breakdown) {
1064: /* Start a new Lanczos factorization */
1065: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
1066: EPSGetStartVector(eps,k,&breakdown);
1067: if (breakdown) {
1068: eps->reason = EPS_DIVERGED_BREAKDOWN;
1069: PetscInfo(eps,"Unable to generate more start vectors\n");
1070: }
1071: } else {
1072: /* Prepare the Rayleigh quotient for restart */
1073: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1074: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1075: b = a + ld;
1076: for (i=k;i<k+l;i++) {
1077: a[i] = PetscRealPart(eps->eigr[i]);
1078: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1079: }
1080: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1081: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1082: }
1083: }
1084: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1085: DSGetMat(eps->ds,DS_MAT_Q,&U);
1086: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1087: MatDestroy(&U);
1089: /* Normalize u and append it to V */
1090: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1091: BVCopyColumn(eps->V,nv,k+l);
1092: }
1093: eps->nconv = k;
1094: if (eps->reason != EPS_CONVERGED_ITERATING) {
1095: /* Store approximated values for next shift */
1096: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1097: sr->nS = l;
1098: for (i=0;i<l;i++) {
1099: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1100: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1101: }
1102: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1103: }
1104: }
1105: /* Check for completion */
1106: for (i=0;i< eps->nconv; i++) {
1107: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1108: else sPres->nconv[0]++;
1109: }
1110: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1111: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1112: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1])SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1113: PetscFree(iwork);
1114: return(0);
1115: }
1117: /*
1118: Obtains value of subsequent shift
1119: */
1122: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1123: {
1124: PetscReal lambda,d_prev;
1125: PetscInt i,idxP;
1126: EPS_SR sr;
1127: EPS_shift sPres,s;
1128: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1131: sr = ctx->sr;
1132: sPres = sr->sPres;
1133: if (sPres->neighb[side]) {
1134: /* Completing a previous interval */
1135: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1136: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1137: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1138: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1139: } else { /* (Only for side=1). Creating a new interval. */
1140: if (sPres->neigs==0) {/* No value has been accepted*/
1141: if (sPres->neighb[0]) {
1142: /* Multiplying by 10 the previous distance */
1143: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1144: sr->nleap++;
1145: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1146: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1147: } else { /* First shift */
1148: if (eps->nconv != 0) {
1149: /* Unaccepted values give information for next shift */
1150: idxP=0;/* Number of values left from shift */
1151: for (i=0;i<eps->nconv;i++) {
1152: lambda = PetscRealPart(sr->eigr[i]);
1153: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1154: else break;
1155: }
1156: /* Avoiding subtraction of eigenvalues (might be the same).*/
1157: if (idxP>0) {
1158: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[0]))/(idxP+0.3);
1159: } else {
1160: d_prev = PetscAbsReal(sPres->value - PetscRealPart(sr->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1161: }
1162: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1163: } else { /* No values found, no information for next shift */
1164: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1165: }
1166: }
1167: } else { /* Accepted values found */
1168: sr->nleap = 0;
1169: /* Average distance of values in previous subinterval */
1170: s = sPres->neighb[0];
1171: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1172: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1173: }
1174: if (s) {
1175: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1176: } else { /* First shift. Average distance obtained with values in this shift */
1177: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1178: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1179: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1180: } else {
1181: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1182: }
1183: }
1184: /* Average distance is used for next shift by adding it to value on the right or to shift */
1185: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1186: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1187: } else { /* Last accepted value is on the left of shift. Adding to shift */
1188: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1189: }
1190: }
1191: /* End of interval can not be surpassed */
1192: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1193: }/* of neighb[side]==null */
1194: return(0);
1195: }
1197: /*
1198: Function for sorting an array of real values
1199: */
1202: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1203: {
1204: PetscReal re;
1205: PetscInt i,j,tmp;
1208: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1209: /* Insertion sort */
1210: for (i=1;i<nr;i++) {
1211: re = PetscRealPart(r[perm[i]]);
1212: j = i-1;
1213: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1214: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1215: }
1216: }
1217: return(0);
1218: }
1220: /* Stores the pairs obtained since the last shift in the global arrays */
1223: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1224: {
1225: PetscErrorCode ierr;
1226: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1227: PetscReal lambda,err,norm;
1228: PetscInt i,count;
1229: PetscBool iscayley;
1230: EPS_SR sr = ctx->sr;
1231: EPS_shift sPres;
1232: Vec v,w;
1235: sPres = sr->sPres;
1236: sPres->index = sr->indexEig;
1237: count = sr->indexEig;
1238: /* Back-transform */
1239: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1240: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1241: /* Sort eigenvalues */
1242: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1243: /* Values stored in global array */
1244: for (i=0;i<eps->nconv;i++) {
1245: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1246: err = eps->errest[eps->perm[i]];
1248: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1249: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1250: sr->eigr[count] = lambda;
1251: sr->errest[count] = err;
1252: /* Explicit purification */
1253: if (eps->purify) {
1254: BVGetColumn(sr->V,count,&v);
1255: BVGetColumn(eps->V,eps->perm[i],&w);
1256: STApply(eps->st,w,v);
1257: BVRestoreColumn(sr->V,count,&v);
1258: BVRestoreColumn(eps->V,eps->perm[i],&w);
1259: BVNormColumn(sr->V,count,NORM_2,&norm);
1260: BVScaleColumn(sr->V,count,1.0/norm);
1261: } else {
1262: BVGetColumn(eps->V,eps->perm[i],&w);
1263: BVInsertVec(sr->V,count,w);
1264: BVRestoreColumn(eps->V,eps->perm[i],&w);
1265: BVNormColumn(sr->V,count,NORM_2,&norm);
1266: BVScaleColumn(sr->V,count,1.0/norm);
1267: }
1268: count++;
1269: }
1270: }
1271: sPres->neigs = count - sr->indexEig;
1272: sr->indexEig = count;
1273: /* Global ordering array updating */
1274: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1275: return(0);
1276: }
1280: static PetscErrorCode EPSLookForDeflation(EPS eps)
1281: {
1282: PetscErrorCode ierr;
1283: PetscReal val;
1284: PetscInt i,count0=0,count1=0;
1285: EPS_shift sPres;
1286: PetscInt ini,fin,k,idx0,idx1;
1287: EPS_SR sr;
1288: Vec v;
1289: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1292: sr = ctx->sr;
1293: sPres = sr->sPres;
1295: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1296: else ini = 0;
1297: fin = sr->indexEig;
1298: /* Selection of ends for searching new values */
1299: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1300: else sPres->ext[0] = sPres->neighb[0]->value;
1301: if (!sPres->neighb[1]) {
1302: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1303: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1304: } else sPres->ext[1] = sPres->neighb[1]->value;
1305: /* Selection of values between right and left ends */
1306: for (i=ini;i<fin;i++) {
1307: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1308: /* Values to the right of left shift */
1309: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1310: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1311: else count1++;
1312: } else break;
1313: }
1314: /* The number of values on each side are found */
1315: if (sPres->neighb[0]) {
1316: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1317: if (sPres->nsch[0]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1318: } else sPres->nsch[0] = 0;
1320: if (sPres->neighb[1]) {
1321: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1322: if (sPres->nsch[1]<0)SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1323: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1325: /* Completing vector of indexes for deflation */
1326: idx0 = ini;
1327: idx1 = ini+count0+count1;
1328: k=0;
1329: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1330: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1331: BVSetNumConstraints(sr->Vnext,k);
1332: for (i=0;i<k;i++) {
1333: BVGetColumn(sr->Vnext,-i-1,&v);
1334: BVCopyVec(sr->V,sr->idxDef[i],v);
1335: BVRestoreColumn(sr->Vnext,-i-1,&v);
1336: }
1338: /* For rational Krylov */
1339: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1340: EPSPrepareRational(eps);
1341: }
1342: eps->nconv = 0;
1343: /* Get rid of temporary Vnext */
1344: BVDestroy(&eps->V);
1345: eps->V = sr->Vnext;
1346: sr->Vnext = NULL;
1347: return(0);
1348: }
1352: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1353: {
1354: PetscErrorCode ierr;
1355: PetscInt i,lds;
1356: PetscReal newS;
1357: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1358: EPS_SR sr=ctx->sr;
1359: Mat A,B=NULL;
1360: PetscObjectState Astate,Bstate=0;
1361: PetscObjectId Aid,Bid=0;
1364: PetscCitationsRegister(citation,&cited);
1365: if (ctx->global) {
1366: EPSSolve_KrylovSchur_Slice(ctx->eps);
1367: ctx->eps->state = EPS_STATE_SOLVED;
1368: eps->reason = EPS_CONVERGED_TOL;
1369: if (ctx->npart>1) {
1370: /* Gather solution from subsolvers */
1371: EPSSliceGatherSolution(eps);
1372: } else {
1373: eps->nconv = sr->numEigs;
1374: eps->its = ctx->eps->its;
1375: PetscFree(ctx->inertias);
1376: PetscFree(ctx->shifts);
1377: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1378: }
1379: } else {
1380: if (ctx->npart==1) {
1381: sr->eigr = ctx->eps->eigr;
1382: sr->eigi = ctx->eps->eigi;
1383: sr->perm = ctx->eps->perm;
1384: sr->errest = ctx->eps->errest;
1385: sr->V = ctx->eps->V;
1386: }
1387: /* Check that the user did not modify subcomm matrices */
1388: EPSGetOperators(eps,&A,&B);
1389: PetscObjectStateGet((PetscObject)A,&Astate);
1390: PetscObjectGetId((PetscObject)A,&Aid);
1391: if (B) {
1392: PetscObjectStateGet((PetscObject)B,&Bstate);
1393: PetscObjectGetId((PetscObject)B,&Bid);
1394: }
1395: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1396: /* Only with eigenvalues present in the interval ...*/
1397: if (sr->numEigs==0) {
1398: eps->reason = EPS_CONVERGED_TOL;
1399: return(0);
1400: }
1401: /* Array of pending shifts */
1402: sr->maxPend = 100; /* Initial size */
1403: sr->nPend = 0;
1404: PetscMalloc1(sr->maxPend,&sr->pending);
1405: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1406: EPSCreateShift(eps,sr->int0,NULL,NULL);
1407: /* extract first shift */
1408: sr->sPrev = NULL;
1409: sr->sPres = sr->pending[--sr->nPend];
1410: sr->sPres->inertia = sr->inertia0;
1411: eps->target = sr->sPres->value;
1412: sr->s0 = sr->sPres;
1413: sr->indexEig = 0;
1414: /* Memory reservation for auxiliary variables */
1415: lds = PetscMin(eps->mpd,eps->ncv);
1416: PetscCalloc1(lds*lds,&sr->S);
1417: PetscMalloc1(eps->ncv,&sr->back);
1418: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1419: for (i=0;i<sr->numEigs;i++) {
1420: sr->eigr[i] = 0.0;
1421: sr->eigi[i] = 0.0;
1422: sr->errest[i] = 0.0;
1423: sr->perm[i] = i;
1424: }
1425: /* Vectors for deflation */
1426: PetscMalloc1(sr->numEigs,&sr->idxDef);
1427: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1428: sr->indexEig = 0;
1429: /* Main loop */
1430: while (sr->sPres) {
1431: /* Search for deflation */
1432: EPSLookForDeflation(eps);
1433: /* KrylovSchur */
1434: EPSKrylovSchur_Slice(eps);
1436: EPSStoreEigenpairs(eps);
1437: /* Select new shift */
1438: if (!sr->sPres->comp[1]) {
1439: EPSGetNewShiftValue(eps,1,&newS);
1440: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1441: }
1442: if (!sr->sPres->comp[0]) {
1443: /* Completing earlier interval */
1444: EPSGetNewShiftValue(eps,0,&newS);
1445: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1446: }
1447: /* Preparing for a new search of values */
1448: EPSExtractShift(eps);
1449: }
1451: /* Updating eps values prior to exit */
1452: PetscFree(sr->S);
1453: PetscFree(sr->idxDef);
1454: PetscFree(sr->pending);
1455: PetscFree(sr->back);
1456: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1457: BVSetNumConstraints(sr->Vnext,0);
1458: BVDestroy(&eps->V);
1459: eps->V = sr->Vnext;
1460: eps->nconv = sr->indexEig;
1461: eps->reason = EPS_CONVERGED_TOL;
1462: eps->its = sr->itsKs;
1463: eps->nds = 0;
1464: }
1465: return(0);
1466: }