Actual source code: bcgsl.c
1: #define PETSCKSP_DLL
2: /*
3: * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
4: * "Enhanced implementation of BiCGStab(L) for solving linear systems
5: * of equations". This uses tricky delayed updating ideas to prevent
6: * round-off buildup.
7: *
8: * This has not been completely cleaned up into PETSc style.
9: *
10: * All the BLAS and LAPACK calls below should be removed and replaced with
11: * loops and the macros for block solvers converted from LINPACK; there is no way
12: * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
13: */
14: #include private/kspimpl.h
15: #include ../src/ksp/ksp/impls/bcgsl/bcgslimpl.h
16: #include petscblaslapack.h
21: static PetscErrorCode KSPSolve_BCGSL(KSP ksp)
22: {
23: KSP_BCGSL *bcgsl = (KSP_BCGSL *) ksp->data;
24: PetscScalar alpha, beta, omega, sigma;
25: PetscScalar rho0, rho1;
26: PetscReal kappa0, kappaA, kappa1;
27: PetscReal ghat, epsilon, abstol;
28: PetscReal zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
29: PetscTruth bUpdateX;
30: PetscTruth bBombed = PETSC_FALSE;
32: PetscInt maxit;
33: PetscInt h, i, j, k, vi, ell;
34: PetscBLASInt ldMZ,bierr;
39: if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
40: if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
41: if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");
43: /* set up temporary vectors */
44: vi = 0;
45: ell = bcgsl->ell;
46: bcgsl->vB = ksp->work[vi]; vi++;
47: bcgsl->vRt = ksp->work[vi]; vi++;
48: bcgsl->vTm = ksp->work[vi]; vi++;
49: bcgsl->vvR = ksp->work+vi; vi += ell+1;
50: bcgsl->vvU = ksp->work+vi; vi += ell+1;
51: bcgsl->vXr = ksp->work[vi]; vi++;
52: ldMZ = PetscBLASIntCast(ell+1);
54: /* Prime the iterative solver */
55: KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
56: VecNorm(VVR[0], NORM_2, &zeta0);
57: rnmax_computed = zeta0;
58: rnmax_true = zeta0;
60: (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
61: if (ksp->reason) {
62: PetscObjectTakeAccess(ksp);
63: ksp->its = 0;
64: ksp->rnorm = zeta0;
65: PetscObjectGrantAccess(ksp);
66: return(0);
67: }
69: VecSet(VVU[0],0.0);
70: alpha = 0.;
71: rho0 = omega = 1;
73: if (bcgsl->delta>0.0) {
74: VecCopy(VX, VXR);
75: VecSet(VX,0.0);
76: VecCopy(VVR[0], VB);
77: } else {
78: VecCopy(ksp->vec_rhs, VB);
79: }
81: /* Life goes on */
82: VecCopy(VVR[0], VRT);
83: zeta = zeta0;
85: KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
87: for (k=0; k<maxit; k += bcgsl->ell) {
88: ksp->its = k;
89: ksp->rnorm = zeta;
91: KSPLogResidualHistory(ksp, zeta);
92: KSPMonitor(ksp, ksp->its, zeta);
94: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
95: if (ksp->reason) break;
97: /* BiCG part */
98: rho0 = -omega*rho0;
99: nrm0 = zeta;
100: for (j=0; j<bcgsl->ell; j++) {
101: /* rho1 <- r_j' * r_tilde */
102: VecDot(VVR[j], VRT, &rho1);
103: if (rho1 == 0.0) {
104: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
105: bBombed = PETSC_TRUE;
106: break;
107: }
108: beta = alpha*(rho1/rho0);
109: rho0 = rho1;
110: for (i=0; i<=j; i++) {
111: /* u_i <- r_i - beta*u_i */
112: VecAYPX(VVU[i], -beta, VVR[i]);
113: }
114: /* u_{j+1} <- inv(K)*A*u_j */
115: KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
117: VecDot(VVU[j+1], VRT, &sigma);
118: if (sigma == 0.0) {
119: ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
120: bBombed = PETSC_TRUE;
121: break;
122: }
123: alpha = rho1/sigma;
125: /* x <- x + alpha*u_0 */
126: VecAXPY(VX, alpha, VVU[0]);
128: for (i=0; i<=j; i++) {
129: /* r_i <- r_i - alpha*u_{i+1} */
130: VecAXPY(VVR[i], -alpha, VVU[i+1]);
131: }
133: /* r_{j+1} <- inv(K)*A*r_j */
134: KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
136: VecNorm(VVR[0], NORM_2, &nrm0);
137: if (bcgsl->delta>0.0) {
138: if (rnmax_computed<nrm0) rnmax_computed = nrm0;
139: if (rnmax_true<nrm0) rnmax_true = nrm0;
140: }
142: /* NEW: check for early exit */
143: (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
144: if (ksp->reason) {
145: PetscObjectTakeAccess(ksp);
146: ksp->its = k+j;
147: ksp->rnorm = nrm0;
148: PetscObjectGrantAccess(ksp);
149: break;
150: }
151: }
153: if (bBombed==PETSC_TRUE) break;
155: /* Polynomial part */
156: for(i = 0; i <= bcgsl->ell; ++i) {
157: VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
158: }
159: /* Symmetrize MZa */
160: for(i = 0; i <= bcgsl->ell; ++i) {
161: for(j = i+1; j <= bcgsl->ell; ++j) {
162: MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
163: }
164: }
165: /* Copy MZa to MZb */
166: PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
168: if (!bcgsl->bConvex || bcgsl->ell==1) {
169: PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);
171: AY0c[0] = -1;
172: LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
173: if (ierr!=0) {
174: ksp->reason = KSP_DIVERGED_BREAKDOWN;
175: bBombed = PETSC_TRUE;
176: break;
177: }
178: PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
179: LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
180: } else {
181: PetscBLASInt ione = 1;
182: PetscScalar aone = 1.0, azero = 0.0;
183: PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);
185: LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
186: if (ierr!=0) {
187: ksp->reason = KSP_DIVERGED_BREAKDOWN;
188: bBombed = PETSC_TRUE;
189: break;
190: }
191: PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
192: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
193: AY0c[0] = -1;
194: AY0c[bcgsl->ell] = 0.;
196: PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
197: LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);
199: AYlc[0] = 0.;
200: AYlc[bcgsl->ell] = -1;
202: BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);
204: kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);
206: /* round-off can cause negative kappa's */
207: if (kappa0<0) kappa0 = -kappa0;
208: kappa0 = sqrt(kappa0);
210: kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
212: BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);
214: kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);
216: if (kappa1<0) kappa1 = -kappa1;
217: kappa1 = sqrt(kappa1);
219: if (kappa0!=0.0 && kappa1!=0.0) {
220: if (kappaA<0.7*kappa0*kappa1) {
221: ghat = (kappaA<0.0) ? -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
222: } else {
223: ghat = kappaA/(kappa1*kappa1);
224: }
225: for (i=0; i<=bcgsl->ell; i++) {
226: AY0c[i] = AY0c[i] - ghat* AYlc[i];
227: }
228: }
229: }
231: omega = AY0c[bcgsl->ell];
232: for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
233: omega = AY0c[h];
234: }
235: if (omega==0.0) {
236: ksp->reason = KSP_DIVERGED_BREAKDOWN;
237: break;
238: }
241: VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
242: for (i=1; i<=bcgsl->ell; i++) {
243: AY0c[i] *= -1.0;
244: }
245: VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
246: VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
247: for (i=1; i<=bcgsl->ell; i++) {
248: AY0c[i] *= -1.0;
249: }
250: VecNorm(VVR[0], NORM_2, &zeta);
252: /* Accurate Update */
253: if (bcgsl->delta>0.0) {
254: if (rnmax_computed<zeta) rnmax_computed = zeta;
255: if (rnmax_true<zeta) rnmax_true = zeta;
257: bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
258: if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
259: /* r0 <- b-inv(K)*A*X */
260: KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
261: VecAYPX(VVR[0], -1.0, VB);
262: rnmax_true = zeta;
264: if (bUpdateX) {
265: VecAXPY(VXR,1.0,VX);
266: VecSet(VX,0.0);
267: VecCopy(VVR[0], VB);
268: rnmax_computed = zeta;
269: }
270: }
271: }
272: }
273: if (bcgsl->delta>0.0) {
274: VecAXPY(VX,1.0,VXR);
275: }
277: (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
278: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
279: return(0);
280: }
284: /*@
285: KSPBCGSLSetXRes - Sets the parameter governing when
286: exact residuals will be used instead of computed residuals.
288: Collective on KSP
290: Input Parameters:
291: + ksp - iterative context obtained from KSPCreate
292: - delta - computed residuals are used alone when delta is not positive
294: Options Database Keys:
296: . -ksp_bcgsl_xres delta
298: Level: intermediate
300: .keywords: KSP, BiCGStab(L), set, exact residuals
302: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
303: @*/
304: PetscErrorCode KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
305: {
306: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
310: if (ksp->setupcalled) {
311: if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
312: KSPDefaultFreeWork(ksp);
313: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
314: ksp->setupcalled = 0;
315: }
316: }
317: bcgsl->delta = delta;
318: return(0);
319: }
323: /*@
324: KSPBCGSLSetPol - Sets the type of polynomial part will
325: be used in the BiCGSTab(L) solver.
327: Collective on KSP
329: Input Parameters:
330: + ksp - iterative context obtained from KSPCreate
331: - uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.
333: Options Database Keys:
335: + -ksp_bcgsl_cxpoly - use enhanced polynomial
336: . -ksp_bcgsl_mrpoly - use standard polynomial
338: Level: intermediate
340: .keywords: KSP, BiCGStab(L), set, polynomial
342: .seealso: @()
343: @*/
344: PetscErrorCode KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
345: {
346: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
350: if (!ksp->setupcalled) {
351: bcgsl->bConvex = uMROR;
352: } else if (bcgsl->bConvex != uMROR) {
353: /* free the data structures,
354: then create them again
355: */
356: KSPDefaultFreeWork(ksp);
357: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
358: bcgsl->bConvex = uMROR;
359: ksp->setupcalled = 0;
360: }
361: return(0);
362: }
366: /*@
367: KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).
369: Collective on KSP
371: Input Parameters:
372: + ksp - iterative context obtained from KSPCreate
373: - ell - number of search directions
375: Options Database Keys:
377: . -ksp_bcgsl_ell ell
379: Level: intermediate
381: .keywords: KSP, BiCGStab(L), set, exact residuals,
383: .seealso: @()
384: @*/
385: PetscErrorCode KSPBCGSLSetEll(KSP ksp, int ell)
386: {
387: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
391: if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");
393: if (!ksp->setupcalled) {
394: bcgsl->ell = ell;
395: } else if (bcgsl->ell != ell) {
396: /* free the data structures, then create them again */
397: KSPDefaultFreeWork(ksp);
398: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
399: bcgsl->ell = ell;
400: ksp->setupcalled = 0;
401: }
402: return(0);
403: }
407: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
408: {
409: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
410: PetscErrorCode ierr;
411: PetscTruth isascii, isstring;
414: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
415: PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);
417: if (isascii) {
418: PetscViewerASCIIPrintf(viewer, " BCGSL: Ell = %D\n", bcgsl->ell);
419: PetscViewerASCIIPrintf(viewer, " BCGSL: Delta = %lg\n", bcgsl->delta);
420: } else {
421: SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
422: }
423: return(0);
424: }
428: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
429: {
430: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
432: PetscInt this_ell;
433: PetscReal delta;
434: PetscTruth flga = PETSC_FALSE, flg;
437: /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
438: don't need to be called here.
439: */
440: PetscOptionsHead("KSP BiCGStab(L) Options");
442: /* Set number of search directions */
443: PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
444: if (flg) {
445: KSPBCGSLSetEll(ksp, this_ell);
446: }
448: /* Set polynomial type */
449: PetscOptionsTruth("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,PETSC_NULL);
450: if (flga) {
451: KSPBCGSLSetPol(ksp, PETSC_TRUE);
452: } else {
453: flg = PETSC_FALSE;
454: PetscOptionsTruth("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,PETSC_NULL);
455: KSPBCGSLSetPol(ksp, PETSC_FALSE);
456: }
458: /* Will computed residual be refreshed? */
459: PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
460: if (flg) {
461: KSPBCGSLSetXRes(ksp, delta);
462: }
463: PetscOptionsTail();
464: return(0);
465: }
469: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
470: {
471: KSP_BCGSL *bcgsl = (KSP_BCGSL *)ksp->data;
472: PetscInt ell = bcgsl->ell,ldMZ = ell+1;
476: if (ksp->pc_side == PC_SYMMETRIC) {
477: SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
478: } else if (ksp->pc_side == PC_RIGHT) {
479: SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
480: }
481: KSPDefaultGetWork(ksp, 6+2*ell);
482: PetscMalloc5(ldMZ,PetscScalar,&AY0c,ldMZ,PetscScalar,&AYlc,ldMZ,PetscScalar,&AYtc,ldMZ*ldMZ,PetscScalar,&MZa,ldMZ*ldMZ,PetscScalar,&MZb);
483: return(0);
484: }
488: PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
489: {
490: KSP_BCGSL *bcgsl = (KSP_BCGSL*)ksp->data;
494: PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
495: KSPDefaultDestroy(ksp);
496: return(0);
497: }
499: /*MC
500: KSPBCGSL - Implements a slight variant of the Enhanced
501: BiCGStab(L) algorithm in (3) and (2). The variation
502: concerns cases when either kappa0**2 or kappa1**2 is
503: negative due to round-off. Kappa0 has also been pulled
504: out of the denominator in the formula for ghat.
506: References:
507: 1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
508: approaches for the stable computation of hybrid BiCG
509: methods", Applied Numerical Mathematics: Transactions
510: f IMACS, 19(3), pp 235-54, 1996.
511: 2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
512: "BiCGStab(L) and other hybrid Bi-CG methods",
513: Numerical Algorithms, 7, pp 75-109, 1994.
514: 3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
515: for solving linear systems of equations", preprint
516: from www.citeseer.com.
518: Contributed by: Joel M. Malard, email jm.malard@pnl.gov
520: Options Database Keys:
521: + -ksp_bcgsl_ell <ell> Number of Krylov search directions
522: - -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
523: - -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals
525: Level: beginner
527: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS
529: M*/
533: PetscErrorCode KSPCreate_BCGSL(KSP ksp)
534: {
536: KSP_BCGSL *bcgsl;
539: /* allocate BiCGStab(L) context */
540: PetscNewLog(ksp, KSP_BCGSL, &bcgsl);
541: ksp->data = (void*)bcgsl;
543: ksp->pc_side = PC_LEFT;
544: ksp->ops->setup = KSPSetUp_BCGSL;
545: ksp->ops->solve = KSPSolve_BCGSL;
546: ksp->ops->destroy = KSPDestroy_BCGSL;
547: ksp->ops->buildsolution = KSPDefaultBuildSolution;
548: ksp->ops->buildresidual = KSPDefaultBuildResidual;
549: ksp->ops->setfromoptions = KSPSetFromOptions_BCGSL;
550: ksp->ops->view = KSPView_BCGSL;
552: /* Let the user redefine the number of directions vectors */
553: bcgsl->ell = 2;
555: /*Choose between a single MR step or an averaged MR/OR */
556: bcgsl->bConvex = PETSC_FALSE;
558: /* Set the threshold for when exact residuals will be used */
559: bcgsl->delta = 0.0;
560: return(0);
561: }