Actual source code: bcgsl.c

  1: #define PETSCKSP_DLL
  2: /*
  3:  * Implementation of BiCGstab(L) the paper by D.R. Fokkema,
  4:  * "Enhanced implementation of BiCGStab(L) for solving linear systems
  5:  * of equations". This uses tricky delayed updating ideas to prevent
  6:  * round-off buildup.
  7:  *
  8:  * This has not been completely cleaned up into PETSc style.
  9:  *
 10:  * All the BLAS and LAPACK calls below should be removed and replaced with 
 11:  * loops and the macros for block solvers converted from LINPACK; there is no way
 12:  * calls to BLAS/LAPACK make sense for size 2, 3, 4, etc.
 13:  */
 14:  #include private/kspimpl.h
 15:  #include ../src/ksp/ksp/impls/bcgsl/bcgslimpl.h
 16:  #include petscblaslapack.h


 21: static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
 22: {
 23:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
 24:   PetscScalar    alpha, beta, omega, sigma;
 25:   PetscScalar    rho0, rho1;
 26:   PetscReal      kappa0, kappaA, kappa1;
 27:   PetscReal      ghat, epsilon, abstol;
 28:   PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
 29:   PetscTruth     bUpdateX;
 30:   PetscTruth     bBombed = PETSC_FALSE;

 32:   PetscInt       maxit;
 33:   PetscInt       h, i, j, k, vi, ell;
 34:   PetscBLASInt   ldMZ,bierr;


 39:   if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
 40:   if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
 41:   if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

 43:   /* set up temporary vectors */
 44:   vi = 0;
 45:   ell = bcgsl->ell;
 46:   bcgsl->vB    = ksp->work[vi]; vi++;
 47:   bcgsl->vRt   = ksp->work[vi]; vi++;
 48:   bcgsl->vTm   = ksp->work[vi]; vi++;
 49:   bcgsl->vvR   = ksp->work+vi; vi += ell+1;
 50:   bcgsl->vvU   = ksp->work+vi; vi += ell+1;
 51:   bcgsl->vXr   = ksp->work[vi]; vi++;
 52:   ldMZ = PetscBLASIntCast(ell+1);

 54:   /* Prime the iterative solver */
 55:   KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
 56:   VecNorm(VVR[0], NORM_2, &zeta0);
 57:   rnmax_computed = zeta0;
 58:   rnmax_true = zeta0;

 60:   (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
 61:   if (ksp->reason) {
 62:     PetscObjectTakeAccess(ksp);
 63:     ksp->its   = 0;
 64:     ksp->rnorm = zeta0;
 65:     PetscObjectGrantAccess(ksp);
 66:     return(0);
 67:   }

 69:   VecSet(VVU[0],0.0);
 70:   alpha = 0.;
 71:   rho0 = omega = 1;

 73:   if (bcgsl->delta>0.0) {
 74:     VecCopy(VX, VXR);
 75:     VecSet(VX,0.0);
 76:     VecCopy(VVR[0], VB);
 77:   } else {
 78:     VecCopy(ksp->vec_rhs, VB);
 79:   }

 81:   /* Life goes on */
 82:   VecCopy(VVR[0], VRT);
 83:   zeta = zeta0;

 85:   KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);

 87:   for (k=0; k<maxit; k += bcgsl->ell) {
 88:     ksp->its   = k;
 89:     ksp->rnorm = zeta;

 91:     KSPLogResidualHistory(ksp, zeta);
 92:     KSPMonitor(ksp, ksp->its, zeta);

 94:     (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
 95:     if (ksp->reason) break;

 97:     /* BiCG part */
 98:     rho0 = -omega*rho0;
 99:     nrm0 = zeta;
100:     for (j=0; j<bcgsl->ell; j++) {
101:       /* rho1 <- r_j' * r_tilde */
102:       VecDot(VVR[j], VRT, &rho1);
103:       if (rho1 == 0.0) {
104:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
105:         bBombed = PETSC_TRUE;
106:         break;
107:       }
108:       beta = alpha*(rho1/rho0);
109:       rho0 = rho1;
110:       for (i=0; i<=j; i++) {
111:         /* u_i <- r_i - beta*u_i */
112:         VecAYPX(VVU[i], -beta, VVR[i]);
113:       }
114:       /* u_{j+1} <- inv(K)*A*u_j */
115:       KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);

117:       VecDot(VVU[j+1], VRT, &sigma);
118:       if (sigma == 0.0) {
119:         ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
120:         bBombed = PETSC_TRUE;
121:         break;
122:       }
123:       alpha = rho1/sigma;

125:       /* x <- x + alpha*u_0 */
126:       VecAXPY(VX, alpha, VVU[0]);

128:       for (i=0; i<=j; i++) {
129:         /* r_i <- r_i - alpha*u_{i+1} */
130:         VecAXPY(VVR[i], -alpha, VVU[i+1]);
131:       }

133:       /* r_{j+1} <- inv(K)*A*r_j */
134:       KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);

136:       VecNorm(VVR[0], NORM_2, &nrm0);
137:       if (bcgsl->delta>0.0) {
138:         if (rnmax_computed<nrm0) rnmax_computed = nrm0;
139:         if (rnmax_true<nrm0) rnmax_true = nrm0;
140:       }

142:       /* NEW: check for early exit */
143:       (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
144:       if (ksp->reason) {
145:         PetscObjectTakeAccess(ksp);
146:         ksp->its   = k+j;
147:         ksp->rnorm = nrm0;
148:         PetscObjectGrantAccess(ksp);
149:         break;
150:       }
151:     }

153:     if (bBombed==PETSC_TRUE) break;

155:     /* Polynomial part */
156:     for(i = 0; i <= bcgsl->ell; ++i) {
157:       VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
158:     }
159:     /* Symmetrize MZa */
160:     for(i = 0; i <= bcgsl->ell; ++i) {
161:       for(j = i+1; j <= bcgsl->ell; ++j) {
162:         MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
163:       }
164:     }
165:     /* Copy MZa to MZb */
166:     PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));

168:     if (!bcgsl->bConvex || bcgsl->ell==1) {
169:       PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

171:       AY0c[0] = -1;
172:       LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
173:       if (ierr!=0) {
174:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
175:         bBombed = PETSC_TRUE;
176:         break;
177:       }
178:       PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
179:       LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
180:     } else {
181:       PetscBLASInt ione = 1;
182:       PetscScalar aone = 1.0, azero = 0.0;
183:       PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

185:       LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
186:       if (ierr!=0) {
187:         ksp->reason = KSP_DIVERGED_BREAKDOWN;
188:         bBombed = PETSC_TRUE;
189:         break;
190:       }
191:       PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
192:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
193:       AY0c[0] = -1;
194:       AY0c[bcgsl->ell] = 0.;

196:       PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
197:       LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

199:       AYlc[0] = 0.;
200:       AYlc[bcgsl->ell] = -1;

202:       BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

204:       kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

206:       /* round-off can cause negative kappa's */
207:       if (kappa0<0) kappa0 = -kappa0;
208:       kappa0 = sqrt(kappa0);

210:       kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

212:       BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

214:       kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

216:       if (kappa1<0) kappa1 = -kappa1;
217:       kappa1 = sqrt(kappa1);

219:       if (kappa0!=0.0 && kappa1!=0.0) {
220:         if (kappaA<0.7*kappa0*kappa1) {
221:           ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
222:         } else {
223:           ghat = kappaA/(kappa1*kappa1);
224:         }
225:         for (i=0; i<=bcgsl->ell; i++) {
226:           AY0c[i] = AY0c[i] - ghat* AYlc[i];
227:         }
228:       }
229:     }

231:     omega = AY0c[bcgsl->ell];
232:     for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
233:       omega = AY0c[h];
234:     }
235:     if (omega==0.0) {
236:       ksp->reason = KSP_DIVERGED_BREAKDOWN;
237:       break;
238:     }


241:     VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
242:     for (i=1; i<=bcgsl->ell; i++) {
243:       AY0c[i] *= -1.0;
244:     }
245:     VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
246:     VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
247:     for (i=1; i<=bcgsl->ell; i++) {
248:       AY0c[i] *= -1.0;
249:     }
250:     VecNorm(VVR[0], NORM_2, &zeta);

252:     /* Accurate Update */
253:     if (bcgsl->delta>0.0) {
254:       if (rnmax_computed<zeta) rnmax_computed = zeta;
255:       if (rnmax_true<zeta) rnmax_true = zeta;

257:       bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
258:       if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
259:         /* r0 <- b-inv(K)*A*X */
260:         KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
261:         VecAYPX(VVR[0], -1.0, VB);
262:         rnmax_true = zeta;

264:         if (bUpdateX) {
265:           VecAXPY(VXR,1.0,VX);
266:           VecSet(VX,0.0);
267:           VecCopy(VVR[0], VB);
268:           rnmax_computed = zeta;
269:         }
270:       }
271:     }
272:   }
273:   if (bcgsl->delta>0.0) {
274:     VecAXPY(VX,1.0,VXR);
275:   }

277:   (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
278:   if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
279:   return(0);
280: }

284: /*@
285:    KSPBCGSLSetXRes - Sets the parameter governing when
286:    exact residuals will be used instead of computed residuals.

288:    Collective on KSP

290:    Input Parameters:
291: +  ksp - iterative context obtained from KSPCreate
292: -  delta - computed residuals are used alone when delta is not positive

294:    Options Database Keys:

296: .  -ksp_bcgsl_xres delta

298:    Level: intermediate

300: .keywords: KSP, BiCGStab(L), set, exact residuals

302: .seealso: KSPBCGSLSetEll(), KSPBCGSLSetPol()
303: @*/
304: PetscErrorCode  KSPBCGSLSetXRes(KSP ksp, PetscReal delta)
305: {
306:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;

310:   if (ksp->setupcalled) {
311:     if ((delta<=0 && bcgsl->delta>0) || (delta>0 && bcgsl->delta<=0)) {
312:       KSPDefaultFreeWork(ksp);
313:       PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
314:       ksp->setupcalled = 0;
315:     }
316:   }
317:   bcgsl->delta = delta;
318:   return(0);
319: }

323: /*@
324:    KSPBCGSLSetPol - Sets the type of polynomial part will
325:    be used in the BiCGSTab(L) solver.

327:    Collective on KSP

329:    Input Parameters:
330: +  ksp - iterative context obtained from KSPCreate
331: -  uMROR - set to PETSC_TRUE when the polynomial is a convex combination of an MR and an OR step.

333:    Options Database Keys:

335: +  -ksp_bcgsl_cxpoly - use enhanced polynomial
336: .  -ksp_bcgsl_mrpoly - use standard polynomial

338:    Level: intermediate

340: .keywords: KSP, BiCGStab(L), set, polynomial

342: .seealso: @()
343: @*/
344: PetscErrorCode  KSPBCGSLSetPol(KSP ksp, PetscTruth uMROR)
345: {
346:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;

350:   if (!ksp->setupcalled) {
351:     bcgsl->bConvex = uMROR;
352:   } else if (bcgsl->bConvex != uMROR) {
353:     /* free the data structures,
354:        then create them again
355:      */
356:     KSPDefaultFreeWork(ksp);
357:     PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
358:     bcgsl->bConvex = uMROR;
359:     ksp->setupcalled = 0;
360:   }
361:   return(0);
362: }

366: /*@
367:    KSPBCGSLSetEll - Sets the number of search directions in BiCGStab(L).

369:    Collective on KSP

371:    Input Parameters:
372: +  ksp - iterative context obtained from KSPCreate
373: -  ell - number of search directions

375:    Options Database Keys:

377: .  -ksp_bcgsl_ell ell

379:    Level: intermediate

381: .keywords: KSP, BiCGStab(L), set, exact residuals,

383: .seealso: @()
384: @*/
385: PetscErrorCode  KSPBCGSLSetEll(KSP ksp, int ell)
386: {
387:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;

391:   if (ell < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE, "KSPBCGSLSetEll: second argument must be positive");

393:   if (!ksp->setupcalled) {
394:     bcgsl->ell = ell;
395:   } else if (bcgsl->ell != ell) {
396:     /* free the data structures, then create them again */
397:     KSPDefaultFreeWork(ksp);
398:     PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
399:     bcgsl->ell = ell;
400:     ksp->setupcalled = 0;
401:   }
402:   return(0);
403: }

407: PetscErrorCode KSPView_BCGSL(KSP ksp, PetscViewer viewer)
408: {
409:   KSP_BCGSL       *bcgsl = (KSP_BCGSL *)ksp->data;
410:   PetscErrorCode  ierr;
411:   PetscTruth      isascii, isstring;

414:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_ASCII, &isascii);
415:   PetscTypeCompare((PetscObject)viewer, PETSC_VIEWER_STRING, &isstring);

417:   if (isascii) {
418:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Ell = %D\n", bcgsl->ell);
419:     PetscViewerASCIIPrintf(viewer, "  BCGSL: Delta = %lg\n", bcgsl->delta);
420:   } else {
421:     SETERRQ1(PETSC_ERR_SUP, "Viewer type %s not supported for KSP BCGSL", ((PetscObject)viewer)->type_name);
422:   }
423:   return(0);
424: }

428: PetscErrorCode KSPSetFromOptions_BCGSL(KSP ksp)
429: {
430:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;
432:   PetscInt       this_ell;
433:   PetscReal      delta;
434:   PetscTruth     flga = PETSC_FALSE, flg;

437:   /* PetscOptionsBegin/End are called in KSPSetFromOptions. They
438:      don't need to be called here.
439:   */
440:   PetscOptionsHead("KSP BiCGStab(L) Options");

442:   /* Set number of search directions */
443:   PetscOptionsInt("-ksp_bcgsl_ell","Number of Krylov search directions","KSPBCGSLSetEll",bcgsl->ell,&this_ell,&flg);
444:   if (flg) {
445:     KSPBCGSLSetEll(ksp, this_ell);
446:   }

448:   /* Set polynomial type */
449:   PetscOptionsTruth("-ksp_bcgsl_cxpoly", "Polynomial part of BiCGStabL is MinRes + OR", "KSPBCGSLSetPol", flga,&flga,PETSC_NULL);
450:   if (flga) {
451:     KSPBCGSLSetPol(ksp, PETSC_TRUE);
452:   } else {
453:     flg  = PETSC_FALSE;
454:     PetscOptionsTruth("-ksp_bcgsl_mrpoly", "Polynomial part of BiCGStabL is MinRes", "KSPBCGSLSetPol", flg,&flg,PETSC_NULL);
455:     KSPBCGSLSetPol(ksp, PETSC_FALSE);
456:   }

458:   /* Will computed residual be refreshed? */
459:   PetscOptionsReal("-ksp_bcgsl_xres", "Threshold used to decide when to refresh computed residuals", "KSPBCGSLSetXRes", bcgsl->delta, &delta, &flg);
460:   if (flg) {
461:     KSPBCGSLSetXRes(ksp, delta);
462:   }
463:   PetscOptionsTail();
464:   return(0);
465: }

469: PetscErrorCode KSPSetUp_BCGSL(KSP ksp)
470: {
471:   KSP_BCGSL      *bcgsl = (KSP_BCGSL *)ksp->data;
472:   PetscInt       ell = bcgsl->ell,ldMZ = ell+1;

476:   if (ksp->pc_side == PC_SYMMETRIC) {
477:     SETERRQ(PETSC_ERR_SUP, "no symmetric preconditioning for KSPBCGSL");
478:   } else if (ksp->pc_side == PC_RIGHT) {
479:     SETERRQ(PETSC_ERR_SUP, "no right preconditioning for KSPBCGSL");
480:   }
481:   KSPDefaultGetWork(ksp, 6+2*ell);
482:   PetscMalloc5(ldMZ,PetscScalar,&AY0c,ldMZ,PetscScalar,&AYlc,ldMZ,PetscScalar,&AYtc,ldMZ*ldMZ,PetscScalar,&MZa,ldMZ*ldMZ,PetscScalar,&MZb);
483:   return(0);
484: }

488: PetscErrorCode KSPDestroy_BCGSL(KSP ksp)
489: {
490:   KSP_BCGSL      *bcgsl = (KSP_BCGSL*)ksp->data;

494:   PetscFree5(AY0c,AYlc,AYtc,MZa,MZb);
495:   KSPDefaultDestroy(ksp);
496:   return(0);
497: }

499: /*MC
500:      KSPBCGSL - Implements a slight variant of the Enhanced
501:                 BiCGStab(L) algorithm in (3) and (2).  The variation
502:                 concerns cases when either kappa0**2 or kappa1**2 is
503:                 negative due to round-off. Kappa0 has also been pulled
504:                 out of the denominator in the formula for ghat.

506:     References:
507:       1. G.L.G. Sleijpen, H.A. van der Vorst, "An overview of
508:          approaches for the stable computation of hybrid BiCG
509:          methods", Applied Numerical Mathematics: Transactions
510:          f IMACS, 19(3), pp 235-54, 1996.
511:       2. G.L.G. Sleijpen, H.A. van der Vorst, D.R. Fokkema,
512:          "BiCGStab(L) and other hybrid Bi-CG methods",
513:           Numerical Algorithms, 7, pp 75-109, 1994.
514:       3. D.R. Fokkema, "Enhanced implementation of BiCGStab(L)
515:          for solving linear systems of equations", preprint
516:          from www.citeseer.com.

518:    Contributed by: Joel M. Malard, email jm.malard@pnl.gov

520:    Options Database Keys:
521: +  -ksp_bcgsl_ell <ell> Number of Krylov search directions
522: -  -ksp_bcgsl_cxpol Use a convex function of the MR and OR polynomials after the BiCG step
523: -  -ksp_bcgsl_xres <res> Threshold used to decide when to refresh computed residuals

525:    Level: beginner

527: .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPBCGS

529: M*/
533: PetscErrorCode  KSPCreate_BCGSL(KSP ksp)
534: {
536:   KSP_BCGSL      *bcgsl;

539:   /* allocate BiCGStab(L) context */
540:   PetscNewLog(ksp, KSP_BCGSL, &bcgsl);
541:   ksp->data = (void*)bcgsl;

543:   ksp->pc_side              = PC_LEFT;
544:   ksp->ops->setup           = KSPSetUp_BCGSL;
545:   ksp->ops->solve           = KSPSolve_BCGSL;
546:   ksp->ops->destroy         = KSPDestroy_BCGSL;
547:   ksp->ops->buildsolution   = KSPDefaultBuildSolution;
548:   ksp->ops->buildresidual   = KSPDefaultBuildResidual;
549:   ksp->ops->setfromoptions  = KSPSetFromOptions_BCGSL;
550:   ksp->ops->view            = KSPView_BCGSL;

552:   /* Let the user redefine the number of directions vectors */
553:   bcgsl->ell = 2;

555:   /*Choose between a single MR step or an averaged MR/OR */
556:   bcgsl->bConvex = PETSC_FALSE;

558:   /* Set the threshold for when exact residuals will be used */
559:   bcgsl->delta = 0.0;
560:   return(0);
561: }