Actual source code: asa.c
1: #define PETSCKSP_DLL
3: /* --------------------------------------------------------------------
5: Contributed by Arvid Bessen, Columbia University, June 2007
6:
7: This file implements a ASA preconditioner in PETSc as part of PC.
9: The adaptive smoothed aggregation algorithm is described in the paper
10: "Adaptive Smoothed Aggregation (ASA)", M. Brezina, R. Falgout, S. MacLachlan,
11: T. Manteuffel, S. McCormick, and J. Ruge, SIAM Journal on Scientific Computing,
12: SISC Volume 25 Issue 6, Pages 1896-1920.
14: For an example usage of this preconditioner, see, e.g.
15: $PETSC_DIR/src/ksp/ksp/examples/tutorials/ex38.c ex39.c
16: and other files in that directory.
18: This code is still somewhat experimental. A number of improvements would be
19: - keep vectors allocated on each level, instead of destroying them
20: (see mainly PCApplyVcycleOnLevel_ASA)
21: - in PCCreateTransferOp_ASA we get all of the submatrices at once, this could
22: be optimized by differentiating between local and global matrices
23: - the code does not handle it gracefully if there is just one level
24: - if relaxation is sufficient, exit of PCInitializationStage_ASA is not
25: completely clean
26: - default values could be more reasonable, especially for parallel solves,
27: where we need a parallel LU or similar
28: - the richardson scaling parameter is somewhat special, should be treated in a
29: good default way
30: - a number of parameters for smoother (sor_omega, etc.) that we store explicitly
31: could be kept in the respective smoothers themselves
32: - some parameters have to be set via command line options, there are no direct
33: function calls available
34: - numerous other stuff
36: Example runs in parallel would be with parameters like
37: mpiexec ./program -pc_asa_coarse_pc_factor_mat_solver_package mumps -pc_asa_direct_solver 200
38: -pc_asa_max_cand_vecs 4 -pc_asa_mu_initial 50 -pc_asa_richardson_scale 1.0
39: -pc_asa_rq_improve 0.9 -asa_smoother_pc_type asm -asa_smoother_sub_pc_type sor
41: -------------------------------------------------------------------- */
43: /*
44: This defines the data structures for the smoothed aggregation procedure
45: */
46: #include ../src/ksp/pc/impls/asa/asa.h
47: #include petscblaslapack.h
49: /* -------------------------------------------------------------------------- */
51: /* Event logging */
53: PetscLogEvent PC_InitializationStage_ASA, PC_GeneralSetupStage_ASA;
54: PetscLogEvent PC_CreateTransferOp_ASA, PC_CreateVcycle_ASA;
55: PetscTruth asa_events_registered = PETSC_FALSE;
60: /*@C
61: PCASASetDM - Sets the coarse grid information for the grids
63: Collective on PC
65: Input Parameter:
66: + pc - the context
67: - dm - the DA or ADDA or VecPack object
69: Level: advanced
71: @*/
72: PetscErrorCode PCASASetDM(PC pc,DM dm)
73: {
74: PetscErrorCode ierr,(*f)(PC,DM);
78: PetscObjectQueryFunction((PetscObject)pc,"PCASASetDM_C",(void (**)(void))&f);
79: if (f) {
80: (*f)(pc,dm);
81: }
82: return(0);
83: }
88: PetscErrorCode PCASASetDM_ASA(PC pc, DM dm)
89: {
91: PC_ASA *asa = (PC_ASA *) pc->data;
94: PetscObjectReference((PetscObject)dm);
95: asa->dm = dm;
96: return(0);
97: }
102: /*@C
103: PCASASetTolerances - Sets the convergence thresholds for ASA algorithm
105: Collective on PC
107: Input Parameter:
108: + pc - the context
109: . rtol - the relative convergence tolerance
110: (relative decrease in the residual norm)
111: . abstol - the absolute convergence tolerance
112: (absolute size of the residual norm)
113: . dtol - the divergence tolerance
114: (amount residual can increase before KSPDefaultConverged()
115: concludes that the method is diverging)
116: - maxits - maximum number of iterations to use
118: Notes:
119: Use PETSC_DEFAULT to retain the default value of any of the tolerances.
121: Level: advanced
122: @*/
123: PetscErrorCode PCASASetTolerances(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
124: {
125: PetscErrorCode ierr,(*f)(PC,PetscReal,PetscReal,PetscReal,PetscInt);
129: PetscObjectQueryFunction((PetscObject)pc,"PCASASetTolerances_C",(void (**)(void))&f);
130: if (f) {
131: (*f)(pc,rtol,abstol,dtol,maxits);
132: }
133: return(0);
134: }
139: PetscErrorCode PCASASetTolerances_ASA(PC pc, PetscReal rtol, PetscReal abstol,PetscReal dtol, PetscInt maxits)
140: {
141: PC_ASA *asa = (PC_ASA *) pc->data;
145: if (rtol != PETSC_DEFAULT) asa->rtol = rtol;
146: if (abstol != PETSC_DEFAULT) asa->abstol = abstol;
147: if (dtol != PETSC_DEFAULT) asa->divtol = dtol;
148: if (maxits != PETSC_DEFAULT) asa->max_it = maxits;
149: return(0);
150: }
155: /*
156: PCCreateLevel_ASA - Creates one level for the ASA algorithm
158: Input Parameters:
159: + level - current level
160: . comm - MPI communicator object
161: . next - pointer to next level
162: . prev - pointer to previous level
163: . ksptype - the KSP type for the smoothers on this level
164: - pctype - the PC type for the smoothers on this level
166: Output Parameters:
167: . new_asa_lev - the newly created level
169: .keywords: ASA, create, levels, multigrid
170: */
171: PetscErrorCode PCCreateLevel_ASA(PC_ASA_level **new_asa_lev, int level,MPI_Comm comm, PC_ASA_level *prev,
172: PC_ASA_level *next,KSPType ksptype, PCType pctype)
173: {
175: PC_ASA_level *asa_lev;
176:
178: PetscMalloc(sizeof(PC_ASA_level), &asa_lev);
180: asa_lev->level = level;
181: asa_lev->size = 0;
183: asa_lev->A = 0;
184: asa_lev->B = 0;
185: asa_lev->x = 0;
186: asa_lev->b = 0;
187: asa_lev->r = 0;
188:
189: asa_lev->dm = 0;
190: asa_lev->aggnum = 0;
191: asa_lev->agg = 0;
192: asa_lev->loc_agg_dofs = 0;
193: asa_lev->agg_corr = 0;
194: asa_lev->bridge_corr = 0;
195:
196: asa_lev->P = 0;
197: asa_lev->Pt = 0;
198: asa_lev->smP = 0;
199: asa_lev->smPt = 0;
201: asa_lev->comm = comm;
203: asa_lev->smoothd = 0;
204: asa_lev->smoothu = 0;
206: asa_lev->prev = prev;
207: asa_lev->next = next;
208:
209: *new_asa_lev = asa_lev;
210: return(0);
211: }
215: PetscErrorCode SafeMatDestroy(Mat *m)
216: {
217: PetscErrorCode 0;
220: if (m && *m) {MatDestroy(*m); *m=0;}
221: PetscFunctionReturn(ierr);
222: }
226: PetscErrorCode SafeVecDestroy(Vec *v)
227: {
228: PetscErrorCode 0;
231: if (v && *v) {VecDestroy(*v); *v=0;}
232: PetscFunctionReturn(ierr);
233: }
237: PetscErrorCode PrintResNorm(Mat A, Vec x, Vec b, Vec r)
238: {
240: PetscTruth destroyr = PETSC_FALSE;
241: PetscReal resnorm;
242: MPI_Comm Acomm;
245: if (!r) {
246: MatGetVecs(A, PETSC_NULL, &r);
247: destroyr = PETSC_TRUE;
248: }
249: MatMult(A, x, r);
250: VecAYPX(r, -1.0, b);
251: VecNorm(r, NORM_2, &resnorm);
252: PetscObjectGetComm((PetscObject) A, &Acomm);
253: PetscPrintf(Acomm, "Residual norm is %f.\n", resnorm);
255: if (destroyr) {
256: VecDestroy(r);
257: }
258:
259: return(0);
260: }
264: PetscErrorCode PrintEnergyNormOfDiff(Mat A, Vec x, Vec y)
265: {
267: Vec vecdiff, Avecdiff;
268: PetscScalar dotprod;
269: PetscReal dotabs;
270: MPI_Comm Acomm;
273: VecDuplicate(x, &vecdiff);
274: VecWAXPY(vecdiff, -1.0, x, y);
275: MatGetVecs(A, PETSC_NULL, &Avecdiff);
276: MatMult(A, vecdiff, Avecdiff);
277: VecDot(vecdiff, Avecdiff, &dotprod);
278: dotabs = PetscAbsScalar(dotprod);
279: PetscObjectGetComm((PetscObject) A, &Acomm);
280: PetscPrintf(Acomm, "Energy norm %f.\n", dotabs);
281: VecDestroy(vecdiff);
282: VecDestroy(Avecdiff);
283: return(0);
284: }
286: /* -------------------------------------------------------------------------- */
287: /*
288: PCDestroyLevel_ASA - Destroys one level of the ASA preconditioner
290: Input Parameter:
291: . asa_lev - pointer to level that should be destroyed
293: */
296: PetscErrorCode PCDestroyLevel_ASA(PC_ASA_level *asa_lev)
297: {
301: SafeMatDestroy(&(asa_lev->A));
302: SafeMatDestroy(&(asa_lev->B));
303: SafeVecDestroy(&(asa_lev->x));
304: SafeVecDestroy(&(asa_lev->b));
305: SafeVecDestroy(&(asa_lev->r));
307: if (asa_lev->dm) {DMDestroy(asa_lev->dm);}
309: SafeMatDestroy(&(asa_lev->agg));
310: PetscFree(asa_lev->loc_agg_dofs);
311: SafeMatDestroy(&(asa_lev->agg_corr));
312: SafeMatDestroy(&(asa_lev->bridge_corr));
314: SafeMatDestroy(&(asa_lev->P));
315: SafeMatDestroy(&(asa_lev->Pt));
316: SafeMatDestroy(&(asa_lev->smP));
317: SafeMatDestroy(&(asa_lev->smPt));
319: if (asa_lev->smoothd != asa_lev->smoothu) {
320: if (asa_lev->smoothd) {KSPDestroy(asa_lev->smoothd);}
321: }
322: if (asa_lev->smoothu) {KSPDestroy(asa_lev->smoothu);}
324: PetscFree(asa_lev);
325: return(0);
326: }
328: /* -------------------------------------------------------------------------- */
329: /*
330: PCComputeSpectralRadius_ASA - Computes the spectral radius of asa_lev->A
331: and stores it it asa_lev->spec_rad
333: Input Parameters:
334: . asa_lev - the level we are treating
336: Compute spectral radius with sqrt(||A||_1 ||A||_inf) >= ||A||_2 >= rho(A)
338: */
341: PetscErrorCode PCComputeSpectralRadius_ASA(PC_ASA_level *asa_lev)
342: {
344: PetscReal norm_1, norm_inf;
347: MatNorm(asa_lev->A, NORM_1, &norm_1);
348: MatNorm(asa_lev->A, NORM_INFINITY, &norm_inf);
349: asa_lev->spec_rad = sqrt(norm_1*norm_inf);
350: return(0);
351: }
355: PetscErrorCode PCSetRichardsonScale_ASA(KSP ksp, PetscReal spec_rad, PetscReal richardson_scale) {
357: PC pc;
358: PetscTruth flg;
359: PetscReal spec_rad_inv;
362: KSPSetInitialGuessNonzero(ksp, PETSC_TRUE);
363: if (richardson_scale != PETSC_DECIDE) {
364: KSPRichardsonSetScale(ksp, richardson_scale);
365: } else {
366: KSPGetPC(ksp, &pc);
367: PetscTypeCompare((PetscObject)(pc), PCNONE, &flg);
368: if (flg) {
369: /* WORK: this is just an educated guess. Any number between 0 and 2/rho(A)
370: should do. asa_lev->spec_rad has to be an upper bound on rho(A). */
371: spec_rad_inv = 1.0/spec_rad;
372: KSPRichardsonSetScale(ksp, spec_rad_inv);
373: } else {
374: SETERRQ(PETSC_ERR_SUP, "Unknown PC type for smoother. Please specify scaling factor with -pc_asa_richardson_scale\n");
375: }
376: }
377: return(0);
378: }
382: PetscErrorCode PCSetSORomega_ASA(PC pc, PetscReal sor_omega)
383: {
387: PCSORSetSymmetric(pc, SOR_SYMMETRIC_SWEEP);
388: if (sor_omega != PETSC_DECIDE) {
389: PCSORSetOmega(pc, sor_omega);
390: }
391: return(0);
392: }
395: /* -------------------------------------------------------------------------- */
396: /*
397: PCSetupSmoothersOnLevel_ASA - Creates the smoothers of the level.
398: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
400: Input Parameters:
401: + asa - the data structure for the ASA preconditioner
402: . asa_lev - the level we are treating
403: - maxits - maximum number of iterations to use
404: */
407: PetscErrorCode PCSetupSmoothersOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
408: {
409: PetscErrorCode ierr;
410: PetscTruth flg;
411: PC pc;
414: /* destroy old smoothers */
415: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
416: KSPDestroy(asa_lev->smoothu);
417: }
418: asa_lev->smoothu = 0;
419: if (asa_lev->smoothd) {
420: KSPDestroy(asa_lev->smoothd);
421: }
422: asa_lev->smoothd = 0;
423: /* create smoothers */
424: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
425: KSPSetType(asa_lev->smoothd, asa->ksptype_smooth);
426: KSPGetPC(asa_lev->smoothd,&pc);
427: PCSetType(pc,asa->pctype_smooth);
429: /* set up problems for smoothers */
430: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
431: KSPSetTolerances(asa_lev->smoothd, asa->smoother_rtol, asa->smoother_abstol, asa->smoother_dtol, maxits);
432: PetscTypeCompare((PetscObject)(asa_lev->smoothd), KSPRICHARDSON, &flg);
433: if (flg) {
434: /* special parameters for certain smoothers */
435: KSPSetInitialGuessNonzero(asa_lev->smoothd, PETSC_TRUE);
436: KSPGetPC(asa_lev->smoothd, &pc);
437: PetscTypeCompare((PetscObject)pc, PCSOR, &flg);
438: if (flg) {
439: PCSetSORomega_ASA(pc, asa->sor_omega);
440: } else {
441: /* just set asa->richardson_scale to get some very basic smoother */
442: PCSetRichardsonScale_ASA(asa_lev->smoothd, asa_lev->spec_rad, asa->richardson_scale);
443: }
444: /* this would be the place to add support for other preconditioners */
445: }
446: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_smoother_");
447: KSPSetFromOptions(asa_lev->smoothd);
448: /* set smoothu equal to smoothd, this could change later */
449: asa_lev->smoothu = asa_lev->smoothd;
450: return(0);
451: }
453: /* -------------------------------------------------------------------------- */
454: /*
455: PCSetupDirectSolversOnLevel_ASA - Creates the direct solvers on the coarsest level.
456: We assume that asa_lev->A and asa_lev->spec_rad are correctly computed
458: Input Parameters:
459: + asa - the data structure for the ASA preconditioner
460: . asa_lev - the level we are treating
461: - maxits - maximum number of iterations to use
462: */
465: PetscErrorCode PCSetupDirectSolversOnLevel_ASA(PC_ASA *asa, PC_ASA_level *asa_lev, PetscInt maxits)
466: {
467: PetscErrorCode ierr;
468: PetscTruth flg;
469: PetscMPIInt comm_size;
470: PC pc;
473: if (asa_lev->smoothu && asa_lev->smoothu != asa_lev->smoothd) {
474: KSPDestroy(asa_lev->smoothu);
475: }
476: asa_lev->smoothu = 0;
477: if (asa_lev->smoothd) {
478: KSPDestroy(asa_lev->smoothd);
479: asa_lev->smoothd = 0;
480: }
481: PetscStrcmp(asa->ksptype_direct, KSPPREONLY, &flg);
482: if (flg) {
483: PetscStrcmp(asa->pctype_direct, PCLU, &flg);
484: if (flg) {
485: MPI_Comm_size(asa_lev->comm, &comm_size);
486: if (comm_size > 1) {
487: /* the LU PC will call MatSolve, we may have to set the correct type for the matrix
488: to have support for this in parallel */
489: MatConvert(asa_lev->A, asa->coarse_mat_type, MAT_REUSE_MATRIX, &(asa_lev->A));
490: }
491: }
492: }
493: /* create new solvers */
494: KSPCreate(asa_lev->comm,&asa_lev->smoothd);
495: KSPSetType(asa_lev->smoothd, asa->ksptype_direct);
496: KSPGetPC(asa_lev->smoothd,&pc);
497: PCSetType(pc,asa->pctype_direct);
498: /* set up problems for direct solvers */
499: KSPSetOperators(asa_lev->smoothd, asa_lev->A, asa_lev->A, DIFFERENT_NONZERO_PATTERN);
500: KSPSetTolerances(asa_lev->smoothd, asa->direct_rtol, asa->direct_abstol, asa->direct_dtol, maxits);
501: /* user can set any option by using -pc_asa_direct_xxx */
502: KSPSetOptionsPrefix(asa_lev->smoothd, "asa_coarse_");
503: KSPSetFromOptions(asa_lev->smoothd);
504: /* set smoothu equal to 0, not used */
505: asa_lev->smoothu = 0;
506: return(0);
507: }
509: /* -------------------------------------------------------------------------- */
510: /*
511: PCCreateAggregates_ASA - Creates the aggregates
513: Input Parameters:
514: . asa_lev - the level for which we should create the projection matrix
516: */
519: PetscErrorCode PCCreateAggregates_ASA(PC_ASA_level *asa_lev)
520: {
521: PetscInt m,n, m_loc,n_loc;
522: PetscInt m_loc_s, m_loc_e;
523: const PetscScalar one = 1.0;
524: PetscErrorCode ierr;
527: /* Create nodal aggregates A_i^l */
528: /* we use the DM grid information for that */
529: if (asa_lev->dm) {
530: /* coarsen DM and get the restriction matrix */
531: DMCoarsen(asa_lev->dm, PETSC_NULL, &(asa_lev->next->dm));
532: DMGetAggregates(asa_lev->next->dm, asa_lev->dm, &(asa_lev->agg));
533: MatGetSize(asa_lev->agg, &m, &n);
534: MatGetLocalSize(asa_lev->agg, &m_loc, &n_loc);
535: if (n!=asa_lev->size) SETERRQ(PETSC_ERR_ARG_SIZ,"DM interpolation matrix has incorrect size!\n");
536: asa_lev->next->size = m;
537: asa_lev->aggnum = m;
538: /* create the correlators, right now just identity matrices */
539: MatCreateMPIAIJ(asa_lev->comm, n_loc, n_loc, n, n, 1, PETSC_NULL, 1, PETSC_NULL,&(asa_lev->agg_corr));
540: MatGetOwnershipRange(asa_lev->agg_corr, &m_loc_s, &m_loc_e);
541: for (m=m_loc_s; m<m_loc_e; m++) {
542: MatSetValues(asa_lev->agg_corr, 1, &m, 1, &m, &one, INSERT_VALUES);
543: }
544: MatAssemblyBegin(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
545: MatAssemblyEnd(asa_lev->agg_corr, MAT_FINAL_ASSEMBLY);
546: /* MatShift(asa_lev->agg_corr, 1.0); */
547: } else {
548: /* somehow define the aggregates without knowing the geometry */
549: /* future WORK */
550: SETERRQ(PETSC_ERR_SUP, "Currently pure algebraic coarsening is not supported!");
551: }
552: return(0);
553: }
555: /* -------------------------------------------------------------------------- */
556: /*
557: PCCreateTransferOp_ASA - Creates the transfer operator P_{l+1}^l for current level
559: Input Parameters:
560: + asa_lev - the level for which should create the transfer operator
561: - construct_bridge - true, if we should construct a bridge operator, false for normal prolongator
563: If we add a second, third, ... candidate vector (i.e. more than one column in B), we
564: have to relate the additional dimensions to the original aggregates. This is done through
565: the "aggregate correlators" agg_corr and bridge_corr.
566: The aggregate that is used in the construction is then given by
567: asa_lev->agg * asa_lev->agg_corr
568: for the regular prolongator construction and
569: asa_lev->agg * asa_lev->bridge_corr
570: for the bridging prolongator constructions.
571: */
574: PetscErrorCode PCCreateTransferOp_ASA(PC_ASA_level *asa_lev, PetscTruth construct_bridge)
575: {
578: const PetscReal Ca = 1e-3;
579: PetscReal cutoff;
580: PetscInt nodes_on_lev;
582: Mat logical_agg;
583: PetscInt mat_agg_loc_start, mat_agg_loc_end, mat_agg_loc_size;
584: PetscInt a;
585: const PetscInt *agg = 0;
586: PetscInt **agg_arr = 0;
588: IS *idxm_is_B_arr = 0;
589: PetscInt *idxn_B = 0;
590: IS idxn_is_B, *idxn_is_B_arr = 0;
592: Mat *b_submat_arr = 0;
594: PetscScalar *b_submat = 0, *b_submat_tp = 0;
595: PetscInt *idxm = 0, *idxn = 0;
596: PetscInt cand_vecs_num;
597: PetscInt *cand_vec_length = 0;
598: PetscInt max_cand_vec_length = 0;
599: PetscScalar **b_orth_arr = 0;
601: PetscInt i,j;
603: PetscScalar *tau = 0, *work = 0;
604: PetscBLASInt info,b1,b2;
606: PetscInt max_cand_vecs_to_add;
607: PetscInt *new_loc_agg_dofs = 0;
609: PetscInt total_loc_cols = 0;
610: PetscReal norm;
612: PetscInt a_loc_m, a_loc_n;
613: PetscInt mat_loc_col_start, mat_loc_col_end, mat_loc_col_size;
614: PetscInt loc_agg_dofs_sum;
615: PetscInt row, col;
616: PetscScalar val;
617: PetscMPIInt comm_size, comm_rank;
618: PetscInt *loc_cols = 0;
621: PetscLogEventBegin(PC_CreateTransferOp_ASA,0,0,0,0);
623: MatGetSize(asa_lev->B, &nodes_on_lev, PETSC_NULL);
625: /* If we add another candidate vector, we want to be able to judge, how much the new candidate
626: improves our current projection operators and whether it is worth adding it.
627: This is the precomputation necessary for implementing Notes (4.1) to (4.7).
628: We require that all candidate vectors x stored in B are normalized such that
629: <A x, x> = 1 and we thus do not have to compute this.
630: For each aggregate A we can now test condition (4.5) and (4.6) by computing
631: || quantity to check ||_{A}^2 <= cutoff * card(A)/N_l */
632: cutoff = Ca/(asa_lev->spec_rad);
634: /* compute logical aggregates by using the correlators */
635: if (construct_bridge) {
636: /* construct bridging operator */
637: MatMatMult(asa_lev->agg, asa_lev->bridge_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
638: } else {
639: /* construct "regular" prolongator */
640: MatMatMult(asa_lev->agg, asa_lev->agg_corr, MAT_INITIAL_MATRIX, 1.0, &logical_agg);
641: }
643: /* destroy correlator matrices for next level, these will be rebuilt in this routine */
644: if (asa_lev->next) {
645: SafeMatDestroy(&(asa_lev->next->agg_corr));
646: SafeMatDestroy(&(asa_lev->next->bridge_corr));
647: }
649: /* find out the correct local row indices */
650: MatGetOwnershipRange(logical_agg, &mat_agg_loc_start, &mat_agg_loc_end);
651: mat_agg_loc_size = mat_agg_loc_end-mat_agg_loc_start;
652:
653: cand_vecs_num = asa_lev->cand_vecs;
655: /* construct column indices idxn_B for reading from B */
656: PetscMalloc(sizeof(PetscInt)*(cand_vecs_num), &idxn_B);
657: for (i=0; i<cand_vecs_num; i++) {
658: idxn_B[i] = i;
659: }
660: ISCreateGeneral(asa_lev->comm, asa_lev->cand_vecs, idxn_B, &idxn_is_B);
661: PetscFree(idxn_B);
662: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxn_is_B_arr);
663: for (a=0; a<mat_agg_loc_size; a++) {
664: idxn_is_B_arr[a] = idxn_is_B;
665: }
666: /* allocate storage for row indices idxm_B */
667: PetscMalloc(sizeof(IS)*mat_agg_loc_size, &idxm_is_B_arr);
669: /* Storage for the orthogonalized submatrices of B and their sizes */
670: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &cand_vec_length);
671: PetscMalloc(sizeof(PetscScalar*)*mat_agg_loc_size, &b_orth_arr);
672: /* Storage for the information about each aggregate */
673: PetscMalloc(sizeof(PetscInt*)*mat_agg_loc_size, &agg_arr);
674: /* Storage for the number of candidate vectors that are orthonormal and used in each submatrix */
675: PetscMalloc(sizeof(PetscInt)*mat_agg_loc_size, &new_loc_agg_dofs);
677: /* loop over local aggregates */
678: for (a=0; a<mat_agg_loc_size; a++) {
679: /* get info about current aggregate, this gives the rows we have to get from B */
680: MatGetRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
681: /* copy aggregate information */
682: PetscMalloc(sizeof(PetscInt)*cand_vec_length[a], &(agg_arr[a]));
683: PetscMemcpy(agg_arr[a], agg, sizeof(PetscInt)*cand_vec_length[a]);
684: /* restore row */
685: MatRestoreRow(logical_agg, a+mat_agg_loc_start, &cand_vec_length[a], &agg, 0);
686:
687: /* create index sets */
688: ISCreateGeneral(PETSC_COMM_SELF, cand_vec_length[a], agg_arr[a], &(idxm_is_B_arr[a]));
689: /* maximum candidate vector length */
690: if (cand_vec_length[a] > max_cand_vec_length) { max_cand_vec_length = cand_vec_length[a]; }
691: }
692: /* destroy logical_agg, no longer needed */
693: SafeMatDestroy(&logical_agg);
695: /* get the entries for aggregate from B */
696: MatGetSubMatrices(asa_lev->B, mat_agg_loc_size, idxm_is_B_arr, idxn_is_B_arr, MAT_INITIAL_MATRIX, &b_submat_arr);
697:
698: /* clean up all the index sets */
699: for (a=0; a<mat_agg_loc_size; a++) { ISDestroy(idxm_is_B_arr[a]); }
700: PetscFree(idxm_is_B_arr);
701: ISDestroy(idxn_is_B);
702: PetscFree(idxn_is_B_arr);
703:
704: /* storage for the values from each submatrix */
705: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat);
706: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length*cand_vecs_num, &b_submat_tp);
707: PetscMalloc(sizeof(PetscInt)*max_cand_vec_length, &idxm);
708: for (i=0; i<max_cand_vec_length; i++) { idxm[i] = i; }
709: PetscMalloc(sizeof(PetscInt)*cand_vecs_num, &idxn);
710: for (i=0; i<cand_vecs_num; i++) { idxn[i] = i; }
711: /* work storage for QR algorithm */
712: PetscMalloc(sizeof(PetscScalar)*max_cand_vec_length, &tau);
713: PetscMalloc(sizeof(PetscScalar)*cand_vecs_num, &work);
715: /* orthogonalize all submatrices and store them in b_orth_arr */
716: for (a=0; a<mat_agg_loc_size; a++) {
717: /* Get the entries for aggregate from B. This is row ordered (although internally
718: it is column ordered and we will waste some energy transposing it).
719: WORK: use something like MatGetArray(b_submat_arr[a], &b_submat) but be really
720: careful about all the different matrix types */
721: MatGetValues(b_submat_arr[a], cand_vec_length[a], idxm, cand_vecs_num, idxn, b_submat);
723: if (construct_bridge) {
724: /* if we are constructing a bridging restriction/interpolation operator, we have
725: to use the same number of dofs as in our previous construction */
726: max_cand_vecs_to_add = asa_lev->loc_agg_dofs[a];
727: } else {
728: /* for a normal restriction/interpolation operator, we should make sure that we
729: do not create linear dependence by accident */
730: max_cand_vecs_to_add = PetscMin(cand_vec_length[a], cand_vecs_num);
731: }
733: /* We use LAPACK to compute the QR decomposition of b_submat. For LAPACK we have to
734: transpose the matrix. We might throw out some column vectors during this process.
735: We are keeping count of the number of column vectors that we use (and therefore the
736: number of dofs on the lower level) in new_loc_agg_dofs[a]. */
737: new_loc_agg_dofs[a] = 0;
738: for (j=0; j<max_cand_vecs_to_add; j++) {
739: /* check for condition (4.5) */
740: norm = 0.0;
741: for (i=0; i<cand_vec_length[a]; i++) {
742: norm += PetscRealPart(b_submat[i*cand_vecs_num+j])*PetscRealPart(b_submat[i*cand_vecs_num+j])
743: + PetscImaginaryPart(b_submat[i*cand_vecs_num+j])*PetscImaginaryPart(b_submat[i*cand_vecs_num+j]);
744: }
745: /* only add candidate vector if bigger than cutoff or first candidate */
746: if ((!j) || (norm > cutoff*((PetscReal) cand_vec_length[a])/((PetscReal) nodes_on_lev))) {
747: /* passed criterion (4.5), we have not implemented criterion (4.6) yet */
748: for (i=0; i<cand_vec_length[a]; i++) {
749: b_submat_tp[new_loc_agg_dofs[a]*cand_vec_length[a]+i] = b_submat[i*cand_vecs_num+j];
750: }
751: new_loc_agg_dofs[a]++;
752: }
753: /* #ifdef PCASA_VERBOSE */
754: else {
755: PetscPrintf(asa_lev->comm, "Cutoff criteria invoked\n");
756: }
757: /* #endif */
758: }
760: CHKMEMQ;
761: /* orthogonalize b_submat_tp using the QR algorithm from LAPACK */
762: b1 = PetscBLASIntCast(*(cand_vec_length+a));
763: b2 = PetscBLASIntCast(*(new_loc_agg_dofs+a));
764: LAPACKgeqrf_(&b1, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
765: if (info) SETERRQ(PETSC_ERR_LIB, "LAPACKgeqrf_ LAPACK routine failed");
766: #if !defined(PETSC_MISSING_LAPACK_ORGQR)
767: LAPACKungqr_(&b1, &b2, &b2, b_submat_tp, &b1, tau, work, &b2, &info);
768: #else
769: SETERRQ(PETSC_ERR_SUP,"ORGQR - Lapack routine is unavailable\nIf linking with ESSL you MUST also link with full LAPACK, for example\nuse config/configure.py with --with-blas-lib=libessl.a --with-lapack-lib=/usr/local/lib/liblapack.a'");
770: #endif
771: if (info) SETERRQ(PETSC_ERR_LIB, "LAPACKungqr_ LAPACK routine failed");
773: /* Transpose b_submat_tp and store it in b_orth_arr[a]. If we are constructing a
774: bridging restriction/interpolation operator, we could end up with less dofs than
775: we previously had. We fill those up with zeros. */
776: if (!construct_bridge) {
777: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*new_loc_agg_dofs[a], b_orth_arr+a);
778: for (j=0; j<new_loc_agg_dofs[a]; j++) {
779: for (i=0; i<cand_vec_length[a]; i++) {
780: b_orth_arr[a][i*new_loc_agg_dofs[a]+j] = b_submat_tp[j*cand_vec_length[a]+i];
781: }
782: }
783: } else {
784: /* bridge, might have to fill up */
785: PetscMalloc(sizeof(PetscScalar)*cand_vec_length[a]*max_cand_vecs_to_add, b_orth_arr+a);
786: for (j=0; j<new_loc_agg_dofs[a]; j++) {
787: for (i=0; i<cand_vec_length[a]; i++) {
788: b_orth_arr[a][i*max_cand_vecs_to_add+j] = b_submat_tp[j*cand_vec_length[a]+i];
789: }
790: }
791: for (j=new_loc_agg_dofs[a]; j<max_cand_vecs_to_add; j++) {
792: for (i=0; i<cand_vec_length[a]; i++) {
793: b_orth_arr[a][i*max_cand_vecs_to_add+j] = 0.0;
794: }
795: }
796: new_loc_agg_dofs[a] = max_cand_vecs_to_add;
797: }
798: /* the number of columns in asa_lev->P that are local to this process */
799: total_loc_cols += new_loc_agg_dofs[a];
800: } /* end of loop over local aggregates */
802: /* destroy the submatrices, also frees all allocated space */
803: MatDestroyMatrices(mat_agg_loc_size, &b_submat_arr);
804: /* destroy all other workspace */
805: PetscFree(b_submat);
806: PetscFree(b_submat_tp);
807: PetscFree(idxm);
808: PetscFree(idxn);
809: PetscFree(tau);
810: PetscFree(work);
812: /* destroy old matrix P, Pt */
813: SafeMatDestroy(&(asa_lev->P));
814: SafeMatDestroy(&(asa_lev->Pt));
816: MatGetLocalSize(asa_lev->A, &a_loc_m, &a_loc_n);
818: /* determine local range */
819: MPI_Comm_size(asa_lev->comm, &comm_size);
820: MPI_Comm_rank(asa_lev->comm, &comm_rank);
821: PetscMalloc(comm_size*sizeof(PetscInt), &loc_cols);
822: MPI_Allgather(&total_loc_cols, 1, MPIU_INT, loc_cols, 1, MPIU_INT, asa_lev->comm);
823: mat_loc_col_start = 0;
824: for (i=0;i<comm_rank;i++) {
825: mat_loc_col_start += loc_cols[i];
826: }
827: mat_loc_col_end = mat_loc_col_start + loc_cols[i];
828: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
829: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_ERR_COR, "Local size does not match matrix size");
830: PetscFree(loc_cols);
832: /* we now have enough information to create asa_lev->P */
833: MatCreateMPIAIJ(asa_lev->comm, a_loc_n, total_loc_cols, asa_lev->size, PETSC_DETERMINE,
834: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->P));
835: /* create asa_lev->Pt */
836: MatCreateMPIAIJ(asa_lev->comm, total_loc_cols, a_loc_n, PETSC_DETERMINE, asa_lev->size,
837: max_cand_vec_length, PETSC_NULL, max_cand_vec_length, PETSC_NULL, &(asa_lev->Pt));
838: if (asa_lev->next) {
839: /* create correlator for aggregates of next level */
840: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
841: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->agg_corr));
842: /* create asa_lev->next->bridge_corr matrix */
843: MatCreateMPIAIJ(asa_lev->comm, mat_agg_loc_size, total_loc_cols, PETSC_DETERMINE, PETSC_DETERMINE,
844: cand_vecs_num, PETSC_NULL, cand_vecs_num, PETSC_NULL, &(asa_lev->next->bridge_corr));
845: }
847: /* this is my own hack, but it should give the columns that we should write to */
848: MatGetOwnershipRangeColumn(asa_lev->P, &mat_loc_col_start, &mat_loc_col_end);
849: mat_loc_col_size = mat_loc_col_end-mat_loc_col_start;
850: if (mat_loc_col_size != total_loc_cols) SETERRQ(PETSC_ERR_ARG_SIZ, "The number of local columns in asa_lev->P assigned to this processor does not match the local vector size");
852: loc_agg_dofs_sum = 0;
853: /* construct P, Pt, agg_corr, bridge_corr */
854: for (a=0; a<mat_agg_loc_size; a++) {
855: /* store b_orth_arr[a] in P */
856: for (i=0; i<cand_vec_length[a]; i++) {
857: row = agg_arr[a][i];
858: for (j=0; j<new_loc_agg_dofs[a]; j++) {
859: col = mat_loc_col_start + loc_agg_dofs_sum + j;
860: val = b_orth_arr[a][i*new_loc_agg_dofs[a] + j];
861: MatSetValues(asa_lev->P, 1, &row, 1, &col, &val, INSERT_VALUES);
862: val = PetscConj(val);
863: MatSetValues(asa_lev->Pt, 1, &col, 1, &row, &val, INSERT_VALUES);
864: }
865: }
867: /* compute aggregate correlation matrices */
868: if (asa_lev->next) {
869: row = a+mat_agg_loc_start;
870: for (i=0; i<new_loc_agg_dofs[a]; i++) {
871: col = mat_loc_col_start + loc_agg_dofs_sum + i;
872: val = 1.0;
873: MatSetValues(asa_lev->next->agg_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
874: /* for the bridge operator we leave out the newest candidates, i.e.
875: we set bridge_corr to 1.0 for all columns up to asa_lev->loc_agg_dofs[a] and to
876: 0.0 between asa_lev->loc_agg_dofs[a] and new_loc_agg_dofs[a] */
877: if (!(asa_lev->loc_agg_dofs && (i >= asa_lev->loc_agg_dofs[a]))) {
878: MatSetValues(asa_lev->next->bridge_corr, 1, &row, 1, &col, &val, INSERT_VALUES);
879: }
880: }
881: }
883: /* move to next entry point col */
884: loc_agg_dofs_sum += new_loc_agg_dofs[a];
885: } /* end of loop over local aggregates */
887: MatAssemblyBegin(asa_lev->P,MAT_FINAL_ASSEMBLY);
888: MatAssemblyEnd(asa_lev->P,MAT_FINAL_ASSEMBLY);
889: MatAssemblyBegin(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
890: MatAssemblyEnd(asa_lev->Pt,MAT_FINAL_ASSEMBLY);
891: if (asa_lev->next) {
892: MatAssemblyBegin(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
893: MatAssemblyEnd(asa_lev->next->agg_corr,MAT_FINAL_ASSEMBLY);
894: MatAssemblyBegin(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
895: MatAssemblyEnd(asa_lev->next->bridge_corr,MAT_FINAL_ASSEMBLY);
896: }
898: /* if we are not constructing a bridging operator, switch asa_lev->loc_agg_dofs
899: and new_loc_agg_dofs */
900: if (construct_bridge) {
901: PetscFree(new_loc_agg_dofs);
902: } else {
903: if (asa_lev->loc_agg_dofs) {
904: PetscFree(asa_lev->loc_agg_dofs);
905: }
906: asa_lev->loc_agg_dofs = new_loc_agg_dofs;
907: }
909: /* clean up */
910: for (a=0; a<mat_agg_loc_size; a++) {
911: PetscFree(b_orth_arr[a]);
912: PetscFree(agg_arr[a]);
913: }
914: PetscFree(cand_vec_length);
915: PetscFree(b_orth_arr);
916: PetscFree(agg_arr);
918: PetscLogEventEnd(PC_CreateTransferOp_ASA, 0,0,0,0);
919: return(0);
920: }
922: /* -------------------------------------------------------------------------- */
923: /*
924: PCSmoothProlongator_ASA - Computes the smoothed prolongators I and It on the level
926: Input Parameters:
927: . asa_lev - the level for which the smoothed prolongator is constructed
928: */
931: PetscErrorCode PCSmoothProlongator_ASA(PC_ASA_level *asa_lev)
932: {
936: SafeMatDestroy(&(asa_lev->smP));
937: SafeMatDestroy(&(asa_lev->smPt));
938: /* compute prolongator I_{l+1}^l = S_l P_{l+1}^l */
939: /* step 1: compute I_{l+1}^l = A_l P_{l+1}^l */
940: MatMatMult(asa_lev->A, asa_lev->P, MAT_INITIAL_MATRIX, 1, &(asa_lev->smP));
941: MatMatMult(asa_lev->Pt, asa_lev->A, MAT_INITIAL_MATRIX, 1, &(asa_lev->smPt));
942: /* step 2: shift and scale to get I_{l+1}^l = P_{l+1}^l - 4/(3/rho) A_l P_{l+1}^l */
943: MatAYPX(asa_lev->smP, -4./(3.*(asa_lev->spec_rad)), asa_lev->P, SUBSET_NONZERO_PATTERN);
944: MatAYPX(asa_lev->smPt, -4./(3.*(asa_lev->spec_rad)), asa_lev->Pt, SUBSET_NONZERO_PATTERN);
946: return(0);
947: }
950: /* -------------------------------------------------------------------------- */
951: /*
952: PCCreateVcycle_ASA - Creates the V-cycle, when aggregates are already defined
954: Input Parameters:
955: . asa - the preconditioner context
956: */
959: PetscErrorCode PCCreateVcycle_ASA(PC_ASA *asa)
960: {
962: PC_ASA_level *asa_lev, *asa_next_lev;
963: Mat AI;
966: PetscLogEventBegin(PC_CreateVcycle_ASA, 0,0,0,0);
968: if (!asa) SETERRQ(PETSC_ERR_ARG_NULL, "asa pointer is NULL");
969: if (!(asa->levellist)) SETERRQ(PETSC_ERR_ARG_NULL, "no levels found");
970: asa_lev = asa->levellist;
971: PCComputeSpectralRadius_ASA(asa_lev);
972: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->nu);
974: while(asa_lev->next) {
975: asa_next_lev = asa_lev->next;
976: /* (a) aggregates are already constructed */
978: /* (b) construct B_{l+1} and P_{l+1}^l using (2.11) */
979: /* construct P_{l+1}^l */
980: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
982: /* construct B_{l+1} */
983: SafeMatDestroy(&(asa_next_lev->B));
984: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
985: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
987: /* (c) construct smoothed prolongator */
988: PCSmoothProlongator_ASA(asa_lev);
989:
990: /* (d) construct coarse matrix */
991: /* Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
992: SafeMatDestroy(&(asa_next_lev->A));
993: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
994: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
995: SafeMatDestroy(&AI);
996: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
997: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
998: PCComputeSpectralRadius_ASA(asa_next_lev);
999: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->nu);
1000: /* create corresponding vectors x_{l+1}, b_{l+1}, r_{l+1} */
1001: SafeVecDestroy(&(asa_next_lev->x));
1002: SafeVecDestroy(&(asa_next_lev->b));
1003: SafeVecDestroy(&(asa_next_lev->r));
1004: MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), &(asa_next_lev->b));
1005: MatGetVecs(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->r));
1007: /* go to next level */
1008: asa_lev = asa_lev->next;
1009: } /* end of while loop over the levels */
1010: /* asa_lev now points to the coarsest level, set up direct solver there */
1011: PCComputeSpectralRadius_ASA(asa_lev);
1012: PCSetupDirectSolversOnLevel_ASA(asa, asa_lev, asa->nu);
1014: PetscLogEventEnd(PC_CreateVcycle_ASA, 0,0,0,0);
1015: return(0);
1016: }
1018: /* -------------------------------------------------------------------------- */
1019: /*
1020: PCAddCandidateToB_ASA - Inserts a candidate vector in B
1022: Input Parameters:
1023: + B - the matrix to insert into
1024: . col_idx - the column we should insert to
1025: . x - the vector to insert
1026: - A - system matrix
1028: Function will insert normalized x into B, such that <A x, x> = 1
1029: (x itself is not changed). If B is projected down then this property
1030: is kept. If <A_l x_l, x_l> = 1 and the next level is defined by
1031: x_{l+1} = Pt x_l and A_{l+1} = Pt A_l P then
1032: <A_{l+1} x_{l+1}, x_l> = <Pt A_l P Pt x_l, Pt x_l>
1033: = <A_l P Pt x_l, P Pt x_l> = <A_l x_l, x_l> = 1
1034: because of the definition of P in (2.11).
1035: */
1038: PetscErrorCode PCAddCandidateToB_ASA(Mat B, PetscInt col_idx, Vec x, Mat A)
1039: {
1041: Vec Ax;
1042: PetscScalar dotprod;
1043: PetscReal norm;
1044: PetscInt i, loc_start, loc_end;
1045: PetscScalar val, *vecarray;
1048: MatGetVecs(A, PETSC_NULL, &Ax);
1049: MatMult(A, x, Ax);
1050: VecDot(Ax, x, &dotprod);
1051: norm = sqrt(PetscAbsScalar(dotprod));
1052: VecGetOwnershipRange(x, &loc_start, &loc_end);
1053: VecGetArray(x, &vecarray);
1054: for (i=loc_start; i<loc_end; i++) {
1055: val = vecarray[i-loc_start]/norm;
1056: MatSetValues(B, 1, &i, 1, &col_idx, &val, INSERT_VALUES);
1057: }
1058: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1059: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1060: VecRestoreArray(x, &vecarray);
1061: VecDestroy(Ax);
1062: return(0);
1063: }
1065: /* -------------------------------------------------------------------------- */
1066: /*
1067: - x - a starting guess for a hard to approximate vector, if PETSC_NULL, will be generated
1068: */
1071: PetscErrorCode PCInitializationStage_ASA(PC_ASA *asa, Vec x)
1072: {
1074: PetscInt l;
1075: PC_ASA_level *asa_lev, *asa_next_lev;
1076: PetscRandom rctx; /* random number generator context */
1078: Vec ax;
1079: PetscScalar tmp;
1080: PetscReal prevnorm, norm;
1082: PetscTruth skip_steps_f_i = PETSC_FALSE;
1083: PetscTruth sufficiently_coarsened = PETSC_FALSE;
1085: PetscInt vec_size, vec_loc_size;
1086: PetscInt loc_vec_low, loc_vec_high;
1087: PetscInt i,j;
1089: /* Vec xhat = 0; */
1091: Mat AI;
1093: Vec cand_vec, cand_vec_new;
1094: PetscTruth isrichardson;
1095: PC coarse_pc;
1098: PetscLogEventBegin(PC_InitializationStage_ASA,0,0,0,0);
1099: l=1;
1100: /* create first level */
1101: PCCreateLevel_ASA(&(asa->levellist), l, asa->comm, 0, 0, asa->ksptype_smooth, asa->pctype_smooth);
1102: asa_lev = asa->levellist;
1104: /* Set matrix */
1105: asa_lev->A = asa->A;
1106: MatGetSize(asa_lev->A, &i, &j);
1107: asa_lev->size = i;
1108: PCComputeSpectralRadius_ASA(asa_lev);
1109: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu_initial);
1111: /* Set DM */
1112: asa_lev->dm = asa->dm;
1113: PetscObjectReference((PetscObject)asa->dm);
1115: PetscPrintf(asa_lev->comm, "Initialization stage\n");
1117: if (x) {
1118: /* use starting guess */
1119: SafeVecDestroy(&(asa_lev->x));
1120: VecDuplicate(x, &(asa_lev->x));
1121: VecCopy(x, asa_lev->x);
1122: } else {
1123: /* select random starting vector */
1124: SafeVecDestroy(&(asa_lev->x));
1125: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1126: PetscRandomCreate(asa_lev->comm,&rctx);
1127: PetscRandomSetFromOptions(rctx);
1128: VecSetRandom(asa_lev->x, rctx);
1129: PetscRandomDestroy(rctx);
1130: }
1132: /* create right hand side */
1133: SafeVecDestroy(&(asa_lev->b));
1134: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1135: VecSet(asa_lev->b, 0.0);
1137: /* relax and check whether that's enough already */
1138: /* compute old norm */
1139: MatGetVecs(asa_lev->A, 0, &ax);
1140: MatMult(asa_lev->A, asa_lev->x, ax);
1141: VecDot(asa_lev->x, ax, &tmp);
1142: prevnorm = PetscAbsScalar(tmp);
1143: PetscPrintf(asa_lev->comm, "Residual norm of starting guess: %f\n", prevnorm);
1145: /* apply mu_initial relaxations */
1146: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1147: /* compute new norm */
1148: MatMult(asa_lev->A, asa_lev->x, ax);
1149: VecDot(asa_lev->x, ax, &tmp);
1150: norm = PetscAbsScalar(tmp);
1151: SafeVecDestroy(&(ax));
1152: PetscPrintf(asa_lev->comm, "Residual norm of relaxation after %g %D relaxations: %g %g\n", asa->epsilon,asa->mu_initial, norm,prevnorm);
1154: /* Check if it already converges by itself */
1155: if (norm/prevnorm <= pow(asa->epsilon, asa->mu_initial)) {
1156: /* converges by relaxation alone */
1157: SETERRQ(PETSC_ERR_SUP, "Relaxation should be sufficient to treat this problem. "
1158: "Use relaxation or decrease epsilon with -pc_asa_epsilon");
1159: } else {
1160: /* set the number of relaxations to asa->mu from asa->mu_initial */
1161: PCSetupSmoothersOnLevel_ASA(asa, asa_lev, asa->mu);
1163: /* Let's do some multigrid ! */
1164: sufficiently_coarsened = PETSC_FALSE;
1166: /* do the whole initialization stage loop */
1167: while (!sufficiently_coarsened) {
1168: PetscPrintf(asa_lev->comm, "Initialization stage: creating level %D\n", asa_lev->level+1);
1170: /* (a) Set candidate matrix B_l = x_l */
1171: /* get the correct vector sizes and data */
1172: VecGetSize(asa_lev->x, &vec_size);
1173: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1174: vec_loc_size = loc_vec_high - loc_vec_low;
1176: /* create matrix for candidates */
1177: MatCreateMPIDense(asa_lev->comm, vec_loc_size, PETSC_DECIDE, vec_size, asa->max_cand_vecs, PETSC_NULL, &(asa_lev->B));
1178: /* set the first column */
1179: PCAddCandidateToB_ASA(asa_lev->B, 0, asa_lev->x, asa_lev->A);
1180: asa_lev->cand_vecs = 1;
1182: /* create next level */
1183: PCCreateLevel_ASA(&(asa_lev->next), asa_lev->level+1, asa_lev->comm, asa_lev, PETSC_NULL, asa->ksptype_smooth, asa->pctype_smooth);
1184: asa_next_lev = asa_lev->next;
1186: /* (b) Create nodal aggregates A_i^l */
1187: PCCreateAggregates_ASA(asa_lev);
1188:
1189: /* (c) Define tentatative prolongator P_{l+1}^l and candidate matrix B_{l+1}
1190: using P_{l+1}^l B_{l+1} = B_l and (P_{l+1}^l)^T P_{l+1}^l = I */
1191: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1193: /* future WORK: set correct fill ratios for all the operations below */
1194: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->B));
1195: asa_next_lev->cand_vecs = asa_lev->cand_vecs;
1197: /* (d) Define prolongator I_{l+1}^l = S_l P_{l+1}^l */
1198: PCSmoothProlongator_ASA(asa_lev);
1200: /* (e) Define coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1201: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1202: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1203: SafeMatDestroy(&AI);
1204: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1205: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1206: PCComputeSpectralRadius_ASA(asa_next_lev);
1207: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1209: /* coarse enough for direct solver? */
1210: MatGetSize(asa_next_lev->A, &i, &j);
1211: if (PetscMax(i,j) <= asa->direct_solver) {
1212: PetscPrintf(asa_lev->comm, "Level %D can be treated directly.\n"
1213: "Algorithm will use %D levels.\n", asa_next_lev->level,
1214: asa_next_lev->level);
1215: break; /* go to step 5 */
1216: }
1218: if (!skip_steps_f_i) {
1219: /* (f) Set x_{l+1} = B_{l+1}, we just compute it again */
1220: SafeVecDestroy(&(asa_next_lev->x));
1221: MatGetVecs(asa_lev->P, &(asa_next_lev->x), 0);
1222: MatMult(asa_lev->Pt, asa_lev->x, asa_next_lev->x);
1224: /* /\* (g) Make copy \hat{x}_{l+1} = x_{l+1} *\/ */
1225: /* VecDuplicate(asa_next_lev->x, &xhat); */
1226: /* VecCopy(asa_next_lev->x, xhat); */
1227:
1228: /* Create b_{l+1} */
1229: SafeVecDestroy(&(asa_next_lev->b));
1230: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), 0);
1231: VecSet(asa_next_lev->b, 0.0);
1233: /* (h) Relax mu times on A_{l+1} x = 0 */
1234: /* compute old norm */
1235: MatGetVecs(asa_next_lev->A, 0, &ax);
1236: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1237: VecDot(asa_next_lev->x, ax, &tmp);
1238: prevnorm = PetscAbsScalar(tmp);
1239: PetscPrintf(asa_next_lev->comm, "Residual norm of starting guess on level %D: %f\n", asa_next_lev->level, prevnorm);
1240: /* apply mu relaxations: WORK, make sure that mu is set correctly */
1241: KSPSolve(asa_next_lev->smoothd, asa_next_lev->b, asa_next_lev->x);
1242: /* compute new norm */
1243: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1244: VecDot(asa_next_lev->x, ax, &tmp);
1245: norm = PetscAbsScalar(tmp);
1246: SafeVecDestroy(&(ax));
1247: PetscPrintf(asa_next_lev->comm, "Residual norm after Richardson iteration on level %D: %f\n", asa_next_lev->level, norm);
1248: /* (i) Check if it already converges by itself */
1249: if (norm/prevnorm <= pow(asa->epsilon, asa->mu)) {
1250: /* relaxation reduces error sufficiently */
1251: skip_steps_f_i = PETSC_TRUE;
1252: }
1253: }
1254: /* (j) go to next coarser level */
1255: l++;
1256: asa_lev = asa_next_lev;
1257: }
1258: /* Step 5. */
1259: asa->levels = asa_next_lev->level; /* WORK: correct? */
1261: /* Set up direct solvers on coarsest level */
1262: if (asa_next_lev->smoothd != asa_next_lev->smoothu) {
1263: if (asa_next_lev->smoothu) { KSPDestroy(asa_next_lev->smoothu); }
1264: }
1265: KSPSetType(asa_next_lev->smoothd, asa->ksptype_direct);
1266: PetscTypeCompare((PetscObject)(asa_next_lev->smoothd), KSPRICHARDSON, &isrichardson);
1267: if (isrichardson) {
1268: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_TRUE);
1269: } else {
1270: KSPSetInitialGuessNonzero(asa_next_lev->smoothd, PETSC_FALSE);
1271: }
1272: KSPGetPC(asa_next_lev->smoothd, &coarse_pc);
1273: PCSetType(coarse_pc, asa->pctype_direct);
1274: asa_next_lev->smoothu = asa_next_lev->smoothd;
1275: PCSetupDirectSolversOnLevel_ASA(asa, asa_next_lev, asa->nu);
1277: /* update finest-level candidate matrix B_1 = I_2^1 I_3^2 ... I_{L-1}^{L-2} x_{L-1} */
1278: if (!asa_lev->prev) {
1279: /* just one relaxation level */
1280: VecDuplicate(asa_lev->x, &cand_vec);
1281: VecCopy(asa_lev->x, cand_vec);
1282: } else {
1283: /* interpolate up the chain */
1284: cand_vec = asa_lev->x;
1285: asa_lev->x = 0;
1286: while(asa_lev->prev) {
1287: /* interpolate to higher level */
1288: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1289: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1290: SafeVecDestroy(&(cand_vec));
1291: cand_vec = cand_vec_new;
1292:
1293: /* destroy all working vectors on the way */
1294: SafeVecDestroy(&(asa_lev->x));
1295: SafeVecDestroy(&(asa_lev->b));
1297: /* move to next higher level */
1298: asa_lev = asa_lev->prev;
1299: }
1300: }
1301: /* set the first column of B1 */
1302: PCAddCandidateToB_ASA(asa_lev->B, 0, cand_vec, asa_lev->A);
1303: SafeVecDestroy(&(cand_vec));
1305: /* Step 6. Create V-cycle */
1306: PCCreateVcycle_ASA(asa);
1307: }
1308: PetscLogEventEnd(PC_InitializationStage_ASA,0,0,0,0);
1309: return(0);
1310: }
1312: /* -------------------------------------------------------------------------- */
1313: /*
1314: PCApplyVcycleOnLevel_ASA - Applies current V-cycle
1316: Input Parameters:
1317: + asa_lev - the current level we should recurse on
1318: - gamma - the number of recursive cycles we should run
1320: */
1323: PetscErrorCode PCApplyVcycleOnLevel_ASA(PC_ASA_level *asa_lev, PetscInt gamma)
1324: {
1326: PC_ASA_level *asa_next_lev;
1327: PetscInt g;
1330: if (!asa_lev) SETERRQ(PETSC_ERR_ARG_NULL, "Level is empty in PCApplyVcycleOnLevel_ASA");
1331: asa_next_lev = asa_lev->next;
1333: if (asa_next_lev) {
1334: /* 1. Presmoothing */
1335: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1336: /* 2. Coarse grid corrections */
1337: /* MatGetVecs(asa_lev->A, 0, &tmp); */
1338: /* MatGetVecs(asa_lev->smP, &(asa_next_lev->b), 0); */
1339: /* MatGetVecs(asa_next_lev->A, &(asa_next_lev->x), 0); */
1340: for (g=0; g<gamma; g++) {
1341: /* (a) get coarsened b_{l+1} = (I_{l+1}^l)^T (b_l - A_l x_l) */
1342: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1343: VecAYPX(asa_lev->r, -1.0, asa_lev->b);
1344: MatMult(asa_lev->smPt, asa_lev->r, asa_next_lev->b);
1346: /* (b) Set x_{l+1} = 0 and recurse */
1347: VecSet(asa_next_lev->x, 0.0);
1348: PCApplyVcycleOnLevel_ASA(asa_next_lev, gamma);
1350: /* (c) correct solution x_l = x_l + I_{l+1}^l x_{l+1} */
1351: MatMultAdd(asa_lev->smP, asa_next_lev->x, asa_lev->x, asa_lev->x);
1352: }
1353: /* SafeVecDestroy(&(asa_lev->r)); */
1354: /* /\* discard x_{l+1}, b_{l+1} *\/ */
1355: /* SafeVecDestroy(&(asa_next_lev->x)); */
1356: /* SafeVecDestroy(&(asa_next_lev->b)); */
1357:
1358: /* 3. Postsmoothing */
1359: KSPSolve(asa_lev->smoothu, asa_lev->b, asa_lev->x);
1360: } else {
1361: /* Base case: solve directly */
1362: KSPSolve(asa_lev->smoothd, asa_lev->b, asa_lev->x);
1363: }
1364: return(0);
1365: }
1368: /* -------------------------------------------------------------------------- */
1369: /*
1370: PCGeneralSetupStage_ASA - Applies the ASA preconditioner to a vector. Algorithm
1371: 4 from the ASA paper
1373: Input Parameters:
1374: + asa - the data structure for the ASA algorithm
1375: - cand - a possible candidate vector, if PETSC_NULL, will be constructed randomly
1377: Output Parameters:
1378: . cand_added - PETSC_TRUE, if new candidate vector added, PETSC_FALSE otherwise
1379: */
1382: PetscErrorCode PCGeneralSetupStage_ASA(PC_ASA *asa, Vec cand, PetscTruth *cand_added)
1383: {
1385: PC_ASA_level *asa_lev, *asa_next_lev;
1387: PetscRandom rctx; /* random number generator context */
1388: PetscReal r;
1389: PetscScalar rs;
1390: PetscTruth nd_fast;
1392: Vec ax;
1393: PetscScalar tmp;
1394: PetscReal norm, prevnorm = 0.0;
1395: PetscInt c;
1397: PetscInt loc_vec_low, loc_vec_high;
1398: PetscInt i;
1400: PetscTruth skip_steps_d_j = PETSC_FALSE;
1402: PetscInt *idxm, *idxn;
1403: PetscScalar *v;
1405: Mat AI;
1407: Vec cand_vec, cand_vec_new;
1410: *cand_added = PETSC_FALSE;
1411:
1412: asa_lev = asa->levellist;
1413: if (asa_lev == 0) SETERRQ(PETSC_ERR_ARG_NULL, "No levels found in PCGeneralSetupStage_ASA");
1414: asa_next_lev = asa_lev->next;
1415: if (asa_next_lev == 0) SETERRQ(PETSC_ERR_ARG_NULL, "Just one level, not implemented yet");
1416:
1417: PetscPrintf(asa_lev->comm, "General setup stage\n");
1419: PetscLogEventBegin(PC_GeneralSetupStage_ASA,0,0,0,0);
1421: /* 1. If max. dof per node on level 2 equals K, stop */
1422: if (asa_next_lev->cand_vecs >= asa->max_dof_lev_2) {
1423: PetscPrintf(PETSC_COMM_WORLD,
1424: "Maximum dof on level 2 reached: %D\n"
1425: "Consider increasing this limit by setting it with -pc_asa_max_dof_lev_2\n",
1426: asa->max_dof_lev_2);
1427: return(0);
1428: }
1430: /* 2. Create copy of B_1 (skipped, we just replace the last column in step 8.) */
1431:
1432: if (!cand) {
1433: /* 3. Select a random x_1 */
1434: SafeVecDestroy(&(asa_lev->x));
1435: MatGetVecs(asa_lev->A, &(asa_lev->x), 0);
1436: PetscRandomCreate(asa_lev->comm,&rctx);
1437: PetscRandomSetFromOptions(rctx);
1438: VecGetOwnershipRange(asa_lev->x, &loc_vec_low, &loc_vec_high);
1439: for (i=loc_vec_low; i<loc_vec_high; i++) {
1440: PetscRandomGetValueReal(rctx, &r);
1441: rs = r;
1442: VecSetValues(asa_lev->x, 1, &i, &rs, INSERT_VALUES);
1443: }
1444: VecAssemblyBegin(asa_lev->x);
1445: VecAssemblyEnd(asa_lev->x);
1446: PetscRandomDestroy(rctx);
1447: } else {
1448: SafeVecDestroy(&(asa_lev->x));
1449: VecDuplicate(cand, &(asa_lev->x));
1450: VecCopy(cand, asa_lev->x);
1451: }
1453: /* create right hand side */
1454: SafeVecDestroy(&(asa_lev->b));
1455: MatGetVecs(asa_lev->A, &(asa_lev->b), 0);
1456: VecSet(asa_lev->b, 0.0);
1457:
1458: /* Apply mu iterations of current V-cycle */
1459: nd_fast = PETSC_FALSE;
1460: MatGetVecs(asa_lev->A, 0, &ax);
1461: for (c=0; c<asa->mu; c++) {
1462: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1463:
1464: MatMult(asa_lev->A, asa_lev->x, ax);
1465: VecDot(asa_lev->x, ax, &tmp);
1466: norm = PetscAbsScalar(tmp);
1467: if (c>0) {
1468: if (norm/prevnorm < asa->epsilon) {
1469: nd_fast = PETSC_TRUE;
1470: break;
1471: }
1472: }
1473: prevnorm = norm;
1474: }
1475: SafeVecDestroy(&(ax));
1477: /* 4. If energy norm decreases sufficiently fast, then stop */
1478: if (nd_fast) {
1479: PetscPrintf(asa_lev->comm, "nd_fast is true\n");
1480: return(0);
1481: }
1483: /* 5. Update B_1, by adding new column x_1 */
1484: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1485: SETERRQ(PETSC_ERR_MEM, "Number of candidate vectors will exceed allocated storage space");
1486: } else {
1487: PetscPrintf(asa_lev->comm, "Adding candidate vector %D\n", asa_lev->cand_vecs+1);
1488: }
1489: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs, asa_lev->x, asa_lev->A);
1490: *cand_added = PETSC_TRUE;
1491: asa_lev->cand_vecs++;
1493: /* 6. loop over levels */
1494: while(asa_next_lev && asa_next_lev->next) {
1495: PetscPrintf(asa_lev->comm, "General setup stage: processing level %D\n", asa_next_lev->level);
1496: /* (a) define B_{l+1} and P_{l+1}^L */
1497: /* construct P_{l+1}^l */
1498: PCCreateTransferOp_ASA(asa_lev, PETSC_FALSE);
1500: /* construct B_{l+1} */
1501: SafeMatDestroy(&(asa_next_lev->B));
1502: MatMatMult(asa_lev->Pt, asa_lev->B, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->B));
1503: /* do not increase asa_next_lev->cand_vecs until step (j) */
1504:
1505: /* (b) construct prolongator I_{l+1}^l = S_l P_{l+1}^l */
1506: PCSmoothProlongator_ASA(asa_lev);
1507:
1508: /* (c) construct coarse matrix A_{l+1} = (I_{l+1}^l)^T A_l I_{l+1}^l */
1509: SafeMatDestroy(&(asa_next_lev->A));
1510: MatMatMult(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1.0, &AI);
1511: MatMatMult(asa_lev->smPt, AI, MAT_INITIAL_MATRIX, 1.0, &(asa_next_lev->A));
1512: SafeMatDestroy(&AI);
1513: /* MatPtAP(asa_lev->A, asa_lev->smP, MAT_INITIAL_MATRIX, 1, &(asa_next_lev->A)); */
1514: MatGetSize(asa_next_lev->A, PETSC_NULL, &(asa_next_lev->size));
1515: PCComputeSpectralRadius_ASA(asa_next_lev);
1516: PCSetupSmoothersOnLevel_ASA(asa, asa_next_lev, asa->mu);
1518: if (! skip_steps_d_j) {
1519: /* (d) get vector x_{l+1} from last column in B_{l+1} */
1520: SafeVecDestroy(&(asa_next_lev->x));
1521: MatGetVecs(asa_next_lev->B, 0, &(asa_next_lev->x));
1523: VecGetOwnershipRange(asa_next_lev->x, &loc_vec_low, &loc_vec_high);
1524: PetscMalloc(sizeof(PetscInt)*(loc_vec_high-loc_vec_low), &idxm);
1525: for (i=loc_vec_low; i<loc_vec_high; i++)
1526: idxm[i-loc_vec_low] = i;
1527: PetscMalloc(sizeof(PetscInt)*1, &idxn);
1528: idxn[0] = asa_next_lev->cand_vecs;
1530: PetscMalloc(sizeof(PetscScalar)*(loc_vec_high-loc_vec_low), &v);
1531: MatGetValues(asa_next_lev->B, loc_vec_high-loc_vec_low, idxm, 1, idxn, v);
1533: VecSetValues(asa_next_lev->x, loc_vec_high-loc_vec_low, idxm, v, INSERT_VALUES);
1534: VecAssemblyBegin(asa_next_lev->x);
1535: VecAssemblyEnd(asa_next_lev->x);
1537: PetscFree(v);
1538: PetscFree(idxm);
1539: PetscFree(idxn);
1540:
1541: /* (e) create bridge transfer operator P_{l+2}^{l+1}, by using the previously
1542: computed candidates */
1543: PCCreateTransferOp_ASA(asa_next_lev, PETSC_TRUE);
1545: /* (f) construct bridging prolongator I_{l+2}^{l+1} = S_{l+1} P_{l+2}^{l+1} */
1546: PCSmoothProlongator_ASA(asa_next_lev);
1548: /* (g) compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1549: MatGetVecs(asa_next_lev->A, 0, &ax);
1550: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1551: VecDot(asa_next_lev->x, ax, &tmp);
1552: prevnorm = PetscAbsScalar(tmp);
1553: SafeVecDestroy(&(ax));
1555: /* (h) apply mu iterations of current V-cycle */
1556: /* set asa_next_lev->b */
1557: SafeVecDestroy(&(asa_next_lev->b));
1558: SafeVecDestroy(&(asa_next_lev->r));
1559: MatGetVecs(asa_next_lev->A, &(asa_next_lev->b), &(asa_next_lev->r));
1560: VecSet(asa_next_lev->b, 0.0);
1561: /* apply V-cycle */
1562: for (c=0; c<asa->mu; c++) {
1563: PCApplyVcycleOnLevel_ASA(asa_next_lev, asa->gamma);
1564: }
1566: /* (i) check convergence */
1567: /* compute <A_{l+1} x_{l+1}, x_{l+1}> and save it */
1568: MatGetVecs(asa_next_lev->A, 0, &ax);
1569: MatMult(asa_next_lev->A, asa_next_lev->x, ax);
1570: VecDot(asa_next_lev->x, ax, &tmp);
1571: norm = PetscAbsScalar(tmp);
1572: SafeVecDestroy(&(ax));
1574: if (norm/prevnorm <= pow(asa->epsilon, asa->mu)) skip_steps_d_j = PETSC_TRUE;
1575:
1576: /* (j) update candidate B_{l+1} */
1577: PCAddCandidateToB_ASA(asa_next_lev->B, asa_next_lev->cand_vecs, asa_next_lev->x, asa_next_lev->A);
1578: asa_next_lev->cand_vecs++;
1579: }
1580: /* go to next level */
1581: asa_lev = asa_lev->next;
1582: asa_next_lev = asa_next_lev->next;
1583: }
1585: /* 7. update the fine-level candidate */
1586: if (! asa_lev->prev) {
1587: /* just one coarsening level */
1588: VecDuplicate(asa_lev->x, &cand_vec);
1589: VecCopy(asa_lev->x, cand_vec);
1590: } else {
1591: cand_vec = asa_lev->x;
1592: asa_lev->x = 0;
1593: while(asa_lev->prev) {
1594: /* interpolate to higher level */
1595: MatGetVecs(asa_lev->prev->smP, 0, &cand_vec_new);
1596: MatMult(asa_lev->prev->smP, cand_vec, cand_vec_new);
1597: SafeVecDestroy(&(cand_vec));
1598: cand_vec = cand_vec_new;
1600: /* destroy all working vectors on the way */
1601: SafeVecDestroy(&(asa_lev->x));
1602: SafeVecDestroy(&(asa_lev->b));
1604: /* move to next higher level */
1605: asa_lev = asa_lev->prev;
1606: }
1607: }
1608: /* 8. update B_1 by setting the last column of B_1 */
1609: PCAddCandidateToB_ASA(asa_lev->B, asa_lev->cand_vecs-1, cand_vec, asa_lev->A);
1610: SafeVecDestroy(&(cand_vec));
1612: /* 9. create V-cycle */
1613: PCCreateVcycle_ASA(asa);
1614:
1615: PetscLogEventEnd(PC_GeneralSetupStage_ASA,0,0,0,0);
1616: return(0);
1617: }
1619: /* -------------------------------------------------------------------------- */
1620: /*
1621: PCConstructMultigrid_ASA - creates the multigrid preconditionier, this is a fairly
1622: involved process, which runs extensive testing to compute good candidate vectors
1624: Input Parameters:
1625: . pc - the preconditioner context
1627: */
1630: PetscErrorCode PCConstructMultigrid_ASA(PC pc)
1631: {
1633: PC_ASA *asa = (PC_ASA*)pc->data;
1634: PC_ASA_level *asa_lev;
1635: PetscInt i, ls, le;
1636: PetscScalar *d;
1637: PetscTruth zeroflag = PETSC_FALSE;
1638: PetscReal rnorm, rnorm_start;
1639: PetscReal rq, rq_prev;
1640: PetscScalar rq_nom, rq_denom;
1641: PetscTruth cand_added;
1642: PetscRandom rctx;
1646: /* check if we should scale with diagonal */
1647: if (asa->scale_diag) {
1648: /* Get diagonal scaling factors */
1649: MatGetVecs(pc->pmat,&(asa->invsqrtdiag),0);
1650: MatGetDiagonal(pc->pmat,asa->invsqrtdiag);
1651: /* compute (inverse) sqrt of diagonal */
1652: VecGetOwnershipRange(asa->invsqrtdiag, &ls, &le);
1653: VecGetArray(asa->invsqrtdiag, &d);
1654: for (i=0; i<le-ls; i++) {
1655: if (d[i] == 0.0) {
1656: d[i] = 1.0;
1657: zeroflag = PETSC_TRUE;
1658: } else {
1659: d[i] = 1./sqrt(PetscAbsScalar(d[i]));
1660: }
1661: }
1662: VecRestoreArray(asa->invsqrtdiag,&d);
1663: VecAssemblyBegin(asa->invsqrtdiag);
1664: VecAssemblyEnd(asa->invsqrtdiag);
1665: if (zeroflag) {
1666: PetscInfo(pc,"Zero detected in diagonal of matrix, using 1 at those locations\n");
1667: }
1668:
1669: /* scale the matrix and store it: D^{-1/2} A D^{-1/2} */
1670: MatDuplicate(pc->pmat, MAT_COPY_VALUES, &(asa->A)); /* probably inefficient */
1671: MatDiagonalScale(asa->A, asa->invsqrtdiag, asa->invsqrtdiag);
1672: } else {
1673: /* don't scale */
1674: asa->A = pc->pmat;
1675: }
1676: /* Initialization stage */
1677: PCInitializationStage_ASA(asa, PETSC_NULL);
1678:
1679: /* get first level */
1680: asa_lev = asa->levellist;
1682: PetscRandomCreate(asa->comm,&rctx);
1683: PetscRandomSetFromOptions(rctx);
1684: VecSetRandom(asa_lev->x,rctx);
1686: /* compute starting residual */
1687: SafeVecDestroy(&(asa_lev->r));
1688: MatGetVecs(asa_lev->A, PETSC_NULL, &(asa_lev->r));
1689: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1690: /* starting residual norm */
1691: VecNorm(asa_lev->r, NORM_2, &rnorm_start);
1692: /* compute Rayleigh quotients */
1693: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1694: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1695: rq_prev = PetscAbsScalar(rq_nom / rq_denom);
1697: /* check if we have to add more candidates */
1698: for (i=0; i<asa->max_it; i++) {
1699: if (asa_lev->cand_vecs >= asa->max_cand_vecs) {
1700: /* reached limit for candidate vectors */
1701: break;
1702: }
1703: /* apply V-cycle */
1704: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1705: /* check convergence */
1706: MatMult(asa_lev->A, asa_lev->x, asa_lev->r);
1707: VecNorm(asa_lev->r, NORM_2, &rnorm);
1708: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1709: if (rnorm < rnorm_start*(asa->rtol) || rnorm < asa->abstol) {
1710: /* convergence */
1711: break;
1712: }
1713: /* compute new Rayleigh quotient */
1714: VecDot(asa_lev->x, asa_lev->r, &rq_nom);
1715: VecDot(asa_lev->x, asa_lev->x, &rq_denom);
1716: rq = PetscAbsScalar(rq_nom / rq_denom);
1717: PetscPrintf(asa->comm, "After %D iterations Rayleigh quotient of residual is %f\n", i+1, rq);
1718: /* test Rayleigh quotient decrease and add more candidate vectors if necessary */
1719: if (i && (rq > asa->rq_improve*rq_prev)) {
1720: /* improve interpolation by adding another candidate vector */
1721: PCGeneralSetupStage_ASA(asa, asa_lev->r, &cand_added);
1722: if (!cand_added) {
1723: /* either too many candidates for storage or cycle is already effective */
1724: PetscPrintf(asa->comm, "either too many candidates for storage or cycle is already effective\n");
1725: break;
1726: }
1727: VecSetRandom(asa_lev->x, rctx);
1728: rq_prev = rq*10000.; /* give the new V-cycle some grace period */
1729: } else {
1730: rq_prev = rq;
1731: }
1732: }
1734: SafeVecDestroy(&(asa_lev->x));
1735: SafeVecDestroy(&(asa_lev->b));
1736: PetscRandomDestroy(rctx);
1737: asa->multigrid_constructed = PETSC_TRUE;
1738: return(0);
1739: }
1741: /* -------------------------------------------------------------------------- */
1742: /*
1743: PCApply_ASA - Applies the ASA preconditioner to a vector.
1745: Input Parameters:
1746: . pc - the preconditioner context
1747: . x - input vector
1749: Output Parameter:
1750: . y - output vector
1752: Application Interface Routine: PCApply()
1753: */
1756: PetscErrorCode PCApply_ASA(PC pc,Vec x,Vec y)
1757: {
1758: PC_ASA *asa = (PC_ASA*)pc->data;
1759: PC_ASA_level *asa_lev;
1764: if (!asa->multigrid_constructed) {
1765: PCConstructMultigrid_ASA(pc);
1766: }
1768: /* get first level */
1769: asa_lev = asa->levellist;
1771: /* set the right hand side */
1772: VecDuplicate(x, &(asa->b));
1773: VecCopy(x, asa->b);
1774: /* set starting vector */
1775: SafeVecDestroy(&(asa->x));
1776: MatGetVecs(asa->A, &(asa->x), PETSC_NULL);
1777: VecSet(asa->x, 0.0);
1778:
1779: /* set vectors */
1780: asa_lev->x = asa->x;
1781: asa_lev->b = asa->b;
1783: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1784:
1785: /* Return solution */
1786: VecCopy(asa->x, y);
1788: /* delete working vectors */
1789: SafeVecDestroy(&(asa->x));
1790: SafeVecDestroy(&(asa->b));
1791: asa_lev->x = PETSC_NULL;
1792: asa_lev->b = PETSC_NULL;
1794: return(0);
1795: }
1797: /* -------------------------------------------------------------------------- */
1798: /*
1799: PCApplyRichardson_ASA - Applies the ASA iteration to solve a linear system
1801: Input Parameters:
1802: . pc - the preconditioner context
1803: . b - the right hand side
1805: Output Parameter:
1806: . x - output vector
1808: DOES NOT WORK!!!!!
1810: */
1813: PetscErrorCode PCApplyRichardson_ASA(PC pc,Vec b,Vec x,Vec w,PetscReal rtol,PetscReal abstol, PetscReal dtol,PetscInt its,PetscTruth guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
1814: {
1815: PC_ASA *asa = (PC_ASA*)pc->data;
1816: PC_ASA_level *asa_lev;
1817: PetscInt i;
1818: PetscReal rnorm, rnorm_start;
1823: if (! asa->multigrid_constructed) {
1824: PCConstructMultigrid_ASA(pc);
1825: }
1827: /* get first level */
1828: asa_lev = asa->levellist;
1830: /* set the right hand side */
1831: VecDuplicate(b, &(asa->b));
1832: if (asa->scale_diag) {
1833: VecPointwiseMult(asa->b, asa->invsqrtdiag, b);
1834: } else {
1835: VecCopy(b, asa->b);
1836: }
1837: /* set starting vector */
1838: VecDuplicate(x, &(asa->x));
1839: VecCopy(x, asa->x);
1840:
1841: /* compute starting residual */
1842: SafeVecDestroy(&(asa->r));
1843: MatGetVecs(asa->A, &(asa->r), PETSC_NULL);
1844: MatMult(asa->A, asa->x, asa->r);
1845: VecAYPX(asa->r, -1.0, asa->b);
1846: /* starting residual norm */
1847: VecNorm(asa->r, NORM_2, &rnorm_start);
1849: /* set vectors */
1850: asa_lev->x = asa->x;
1851: asa_lev->b = asa->b;
1853: *reason = PCRICHARDSON_CONVERGED_ITS;
1854: /* **************** Full algorithm loop *********************************** */
1855: for (i=0; i<its; i++) {
1856: /* apply V-cycle */
1857: PCApplyVcycleOnLevel_ASA(asa_lev, asa->gamma);
1858: /* check convergence */
1859: MatMult(asa->A, asa->x, asa->r);
1860: VecAYPX(asa->r, -1.0, asa->b);
1861: VecNorm(asa->r, NORM_2, &rnorm);
1862: PetscPrintf(asa->comm, "After %D iterations residual norm is %f\n", i+1, rnorm);
1863: if (rnorm < rnorm_start*(rtol)) {
1864: *reason = PCRICHARDSON_CONVERGED_RTOL;
1865: break;
1866: } else if (rnorm < asa->abstol) {
1867: *reason = PCRICHARDSON_CONVERGED_ATOL;
1868: break;
1869: } else if (rnorm > rnorm_start*(dtol)) {
1870: *reason = PCRICHARDSON_DIVERGED_DTOL;
1871: break;
1872: }
1873: }
1874: *outits = i;
1875:
1876: /* Return solution */
1877: if (asa->scale_diag) {
1878: VecPointwiseMult(x, asa->x, asa->invsqrtdiag);
1879: } else {
1880: VecCopy(x, asa->x);
1881: }
1883: /* delete working vectors */
1884: SafeVecDestroy(&(asa->x));
1885: SafeVecDestroy(&(asa->b));
1886: SafeVecDestroy(&(asa->r));
1887: asa_lev->x = PETSC_NULL;
1888: asa_lev->b = PETSC_NULL;
1889: return(0);
1890: }
1892: /* -------------------------------------------------------------------------- */
1893: /*
1894: PCDestroy_ASA - Destroys the private context for the ASA preconditioner
1895: that was created with PCCreate_ASA().
1897: Input Parameter:
1898: . pc - the preconditioner context
1900: Application Interface Routine: PCDestroy()
1901: */
1904: static PetscErrorCode PCDestroy_ASA(PC pc)
1905: {
1906: PC_ASA *asa;
1907: PC_ASA_level *asa_lev;
1908: PC_ASA_level *asa_next_level;
1913: asa = (PC_ASA*)pc->data;
1914: asa_lev = asa->levellist;
1916: /* Delete top level data */
1917: PetscFree(asa->ksptype_smooth);
1918: PetscFree(asa->pctype_smooth);
1919: PetscFree(asa->ksptype_direct);
1920: PetscFree(asa->pctype_direct);
1921: PetscFree(asa->coarse_mat_type);
1923: /* this is destroyed by the levels below */
1924: /* SafeMatDestroy(&(asa->A)); */
1925: SafeVecDestroy(&(asa->invsqrtdiag));
1926: SafeVecDestroy(&(asa->b));
1927: SafeVecDestroy(&(asa->x));
1928: SafeVecDestroy(&(asa->r));
1930: if (asa->dm) {DMDestroy(asa->dm);}
1932: /* Destroy each of the levels */
1933: while(asa_lev) {
1934: asa_next_level = asa_lev->next;
1935: PCDestroyLevel_ASA(asa_lev);
1936: asa_lev = asa_next_level;
1937: }
1939: PetscFree(asa);
1940: return(0);
1941: }
1945: static PetscErrorCode PCSetFromOptions_ASA(PC pc)
1946: {
1947: PC_ASA *asa = (PC_ASA*)pc->data;
1948: PetscTruth flg;
1950: char type[20];
1955: PetscOptionsHead("ASA options");
1956: /* convergence parameters */
1957: PetscOptionsInt("-pc_asa_nu","Number of cycles to run smoother","No manual page yet",asa->nu,&(asa->nu),&flg);
1958: PetscOptionsInt("-pc_asa_gamma","Number of cycles to run coarse grid correction","No manual page yet",asa->gamma,&(asa->gamma),&flg);
1959: PetscOptionsReal("-pc_asa_epsilon","Tolerance for the relaxation method","No manual page yet",asa->epsilon,&(asa->epsilon),&flg);
1960: PetscOptionsInt("-pc_asa_mu","Number of cycles to relax in setup stages","No manual page yet",asa->mu,&(asa->mu),&flg);
1961: PetscOptionsInt("-pc_asa_mu_initial","Number of cycles to relax for generating first candidate vector","No manual page yet",asa->mu_initial,&(asa->mu_initial),&flg);
1962: PetscOptionsInt("-pc_asa_direct_solver","For which matrix size should we use the direct solver?","No manual page yet",asa->direct_solver,&(asa->direct_solver),&flg);
1963: PetscOptionsTruth("-pc_asa_scale_diag","Should we scale the matrix with the inverse of its diagonal?","No manual page yet",asa->scale_diag,&(asa->scale_diag),&flg);
1964: /* type of smoother used */
1965: PetscOptionsList("-pc_asa_smoother_ksp_type","The type of KSP to be used in the smoothers","No manual page yet",KSPList,asa->ksptype_smooth,type,20,&flg);
1966: if (flg) {
1967: PetscFree(asa->ksptype_smooth);
1968: PetscStrallocpy(type,&(asa->ksptype_smooth));
1969: }
1970: PetscOptionsList("-pc_asa_smoother_pc_type","The type of PC to be used in the smoothers","No manual page yet",PCList,asa->pctype_smooth,type,20,&flg);
1971: if (flg) {
1972: PetscFree(asa->pctype_smooth);
1973: PetscStrallocpy(type,&(asa->pctype_smooth));
1974: }
1975: PetscOptionsList("-pc_asa_direct_ksp_type","The type of KSP to be used in the direct solver","No manual page yet",KSPList,asa->ksptype_direct,type,20,&flg);
1976: if (flg) {
1977: PetscFree(asa->ksptype_direct);
1978: PetscStrallocpy(type,&(asa->ksptype_direct));
1979: }
1980: PetscOptionsList("-pc_asa_direct_pc_type","The type of PC to be used in the direct solver","No manual page yet",PCList,asa->pctype_direct,type,20,&flg);
1981: if (flg) {
1982: PetscFree(asa->pctype_direct);
1983: PetscStrallocpy(type,&(asa->pctype_direct));
1984: }
1985: /* options specific for certain smoothers */
1986: PetscOptionsReal("-pc_asa_richardson_scale","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->richardson_scale,&(asa->richardson_scale),&flg);
1987: PetscOptionsReal("-pc_asa_sor_omega","Scaling parameter for preconditioning in relaxation, if smoothing KSP is Richardson","No manual page yet",asa->sor_omega,&(asa->sor_omega),&flg);
1988: /* options for direct solver */
1989: PetscOptionsString("-pc_asa_coarse_mat_type","The coarse level matrix type (e.g. SuperLU, MUMPS, ...)","No manual page yet",asa->coarse_mat_type, type,20,&flg);
1990: if (flg) {
1991: PetscFree(asa->coarse_mat_type);
1992: PetscStrallocpy(type,&(asa->coarse_mat_type));
1993: }
1994: /* storage allocation parameters */
1995: PetscOptionsInt("-pc_asa_max_cand_vecs","Maximum number of candidate vectors","No manual page yet",asa->max_cand_vecs,&(asa->max_cand_vecs),&flg);
1996: PetscOptionsInt("-pc_asa_max_dof_lev_2","The maximum number of degrees of freedom per node on level 2 (K in paper)","No manual page yet",asa->max_dof_lev_2,&(asa->max_dof_lev_2),&flg);
1997: /* construction parameters */
1998: PetscOptionsReal("-pc_asa_rq_improve","Threshold in RQ improvement for adding another candidate","No manual page yet",asa->rq_improve,&(asa->rq_improve),&flg);
1999: PetscOptionsTail();
2000: return(0);
2001: }
2005: static PetscErrorCode PCView_ASA(PC pc,PetscViewer viewer)
2006: {
2007: PC_ASA *asa = (PC_ASA*)pc->data;
2009: PetscTruth iascii;
2010: PC_ASA_level *asa_lev = asa->levellist;
2013: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
2014: if (iascii) {
2015: PetscViewerASCIIPrintf(viewer," ASA:\n");
2016: asa_lev = asa->levellist;
2017: while (asa_lev) {
2018: if (!asa_lev->next) {
2019: PetscViewerASCIIPrintf(viewer,"Coarse grid solver -- level %D -------------------------------\n",0);
2020: } else {
2021: PetscViewerASCIIPrintf(viewer,"Down solver (pre-smoother) on level ? -------------------------------\n");
2022: }
2023: PetscViewerASCIIPushTab(viewer);
2024: KSPView(asa_lev->smoothd,viewer);
2025: PetscViewerASCIIPopTab(viewer);
2026: if (asa_lev->next && asa_lev->smoothd == asa_lev->smoothu) {
2027: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) same as down solver (pre-smoother)\n");
2028: } else if (asa_lev->next){
2029: PetscViewerASCIIPrintf(viewer,"Up solver (post-smoother) on level ? -------------------------------\n");
2030: PetscViewerASCIIPushTab(viewer);
2031: KSPView(asa_lev->smoothu,viewer);
2032: PetscViewerASCIIPopTab(viewer);
2033: }
2034: asa_lev = asa_lev->next;
2035: }
2036: } else {
2037: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for PCASA",((PetscObject)viewer)->type_name);
2038: }
2039: return(0);
2040: }
2042: /* -------------------------------------------------------------------------- */
2043: /*
2044: PCCreate_ASA - Creates a ASA preconditioner context, PC_ASA,
2045: and sets this as the private data within the generic preconditioning
2046: context, PC, that was created within PCCreate().
2048: Input Parameter:
2049: . pc - the preconditioner context
2051: Application Interface Routine: PCCreate()
2052: */
2056: PetscErrorCode PCCreate_ASA(PC pc)
2057: {
2059: PC_ASA *asa;
2064: /*
2065: Set the pointers for the functions that are provided above.
2066: Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
2067: are called, they will automatically call these functions. Note we
2068: choose not to provide a couple of these functions since they are
2069: not needed.
2070: */
2071: pc->ops->apply = PCApply_ASA;
2072: /* pc->ops->applytranspose = PCApply_ASA;*/
2073: pc->ops->applyrichardson = PCApplyRichardson_ASA;
2074: pc->ops->setup = 0;
2075: pc->ops->destroy = PCDestroy_ASA;
2076: pc->ops->setfromoptions = PCSetFromOptions_ASA;
2077: pc->ops->view = PCView_ASA;
2079: /* Set the data to pointer to 0 */
2080: pc->data = (void*)0;
2082: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetDM_C","PCASASetDM_ASA",PCASASetDM_ASA);
2083: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCASASetTolerances_C","PCASASetTolerances_ASA",PCASASetTolerances_ASA);
2085: /* register events */
2086: if (! asa_events_registered) {
2087: PetscLogEventRegister("PCInitializationStage_ASA", PC_COOKIE,&PC_InitializationStage_ASA);
2088: PetscLogEventRegister("PCGeneralSetupStage_ASA", PC_COOKIE,&PC_GeneralSetupStage_ASA);
2089: PetscLogEventRegister("PCCreateTransferOp_ASA", PC_COOKIE,&PC_CreateTransferOp_ASA);
2090: PetscLogEventRegister("PCCreateVcycle_ASA", PC_COOKIE,&PC_CreateVcycle_ASA);
2091: asa_events_registered = PETSC_TRUE;
2092: }
2094: /* Create new PC_ASA object */
2095: PetscNewLog(pc,PC_ASA,&asa);
2096: pc->data = (void*)asa;
2098: /* WORK: find some better initial values */
2099: asa->nu = 3;
2100: asa->gamma = 1;
2101: asa->epsilon = 1e-4;
2102: asa->mu = 3;
2103: asa->mu_initial = 20;
2104: asa->direct_solver = 100;
2105: asa->scale_diag = PETSC_TRUE;
2106: PetscStrallocpy(KSPRICHARDSON, (char **) &(asa->ksptype_smooth));
2107: PetscStrallocpy(PCSOR, (char **) &(asa->pctype_smooth));
2108: asa->smoother_rtol = 1e-10;
2109: asa->smoother_abstol = 1e-20;
2110: asa->smoother_dtol = PETSC_DEFAULT;
2111: PetscStrallocpy(KSPPREONLY, (char **) &(asa->ksptype_direct));
2112: PetscStrallocpy(PCREDUNDANT, (char **) &(asa->pctype_direct));
2113: asa->direct_rtol = 1e-10;
2114: asa->direct_abstol = 1e-20;
2115: asa->direct_dtol = PETSC_DEFAULT;
2116: asa->richardson_scale = PETSC_DECIDE;
2117: asa->sor_omega = PETSC_DECIDE;
2118: PetscStrallocpy(MATSAME, (char **) &(asa->coarse_mat_type));
2120: asa->max_cand_vecs = 4;
2121: asa->max_dof_lev_2 = 640; /* I don't think this parameter really matters, 640 should be enough for everyone! */
2123: asa->multigrid_constructed = PETSC_FALSE;
2125: asa->rtol = 1e-10;
2126: asa->abstol = 1e-15;
2127: asa->divtol = 1e5;
2128: asa->max_it = 10000;
2129: asa->rq_improve = 0.9;
2130:
2131: asa->A = 0;
2132: asa->invsqrtdiag = 0;
2133: asa->b = 0;
2134: asa->x = 0;
2135: asa->r = 0;
2137: asa->dm = 0;
2138:
2139: asa->levels = 0;
2140: asa->levellist = 0;
2142: asa->comm = ((PetscObject)pc)->comm;
2143: return(0);
2144: }