Actual source code: superlu_dist.c
1: #define PETSCMAT_DLL
3: /*
4: Provides an interface to the SuperLU_DIST_2.2 sparse solver
5: */
7: #include "../src/mat/impls/aij/seq/aij.h"
8: #include "../src/mat/impls/aij/mpi/mpiaij.h"
9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10: #include "stdlib.h"
11: #endif
13: #if defined(PETSC_USE_64BIT_INDICES)
14: /* ugly SuperLU_Dist variable telling it to use long long int */
15: #define _LONGINT
16: #endif
19: #if defined(PETSC_USE_COMPLEX)
20: #include "superlu_zdefs.h"
21: #else
22: #include "superlu_ddefs.h"
23: #endif
26: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
29: typedef struct {
30: int_t nprow,npcol,*row,*col;
31: gridinfo_t grid;
32: superlu_options_t options;
33: SuperMatrix A_sup;
34: ScalePermstruct_t ScalePermstruct;
35: LUstruct_t LUstruct;
36: int StatPrint;
37: SuperLU_MatInputMode MatInputMode;
38: SOLVEstruct_t SOLVEstruct;
39: fact_t FactPattern;
40: MPI_Comm comm_superlu;
41: #if defined(PETSC_USE_COMPLEX)
42: doublecomplex *val;
43: #else
44: double *val;
45: #endif
46: PetscTruth matsolve_iscalled,matmatsolve_iscalled;
47: PetscTruth CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
48: } Mat_SuperLU_DIST;
60: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
61: {
62: PetscErrorCode ierr;
63: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
64: PetscTruth flg;
65:
67: if (lu->CleanUpSuperLU_Dist) {
68: /* Deallocate SuperLU_DIST storage */
69: if (lu->MatInputMode == GLOBAL) {
70: Destroy_CompCol_Matrix_dist(&lu->A_sup);
71: } else {
72: Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
73: if ( lu->options.SolveInitialized ) {
74: #if defined(PETSC_USE_COMPLEX)
75: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
76: #else
77: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
78: #endif
79: }
80: }
81: Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
82: ScalePermstructFree(&lu->ScalePermstruct);
83: LUstructFree(&lu->LUstruct);
84:
85: /* Release the SuperLU_DIST process grid. */
86: superlu_gridexit(&lu->grid);
87:
88: MPI_Comm_free(&(lu->comm_superlu));
89: }
91: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
92: if (flg) {
93: MatDestroy_SeqAIJ(A);
94: } else {
95: MatDestroy_MPIAIJ(A);
96: }
97: return(0);
98: }
102: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
103: {
104: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
105: PetscErrorCode ierr;
106: PetscMPIInt size;
107: PetscInt m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
108: SuperLUStat_t stat;
109: double berr[1];
110: PetscScalar *bptr;
111: PetscInt nrhs=1;
112: Vec x_seq;
113: IS iden;
114: VecScatter scat;
115: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
118: MPI_Comm_size(((PetscObject)A)->comm,&size);
119: if (size > 1 && lu->MatInputMode == GLOBAL) {
120: /* global mat input, convert b to x_seq */
121: VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
122: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
123: VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
124: ISDestroy(iden);
126: VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
127: VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
128: VecGetArray(x_seq,&bptr);
129: } else { /* size==1 || distributed mat input */
130: if (lu->options.SolveInitialized && !lu->matsolve_iscalled){
131: /* see comments in MatMatSolve() */
132: #if defined(PETSC_USE_COMPLEX)
133: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
134: #else
135: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
136: #endif
137: lu->options.SolveInitialized = NO;
138: }
139: VecCopy(b_mpi,x);
140: VecGetArray(x,&bptr);
141: }
142:
143: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
145: PStatInit(&stat); /* Initialize the statistics variables. */
146: if (lu->MatInputMode == GLOBAL) {
147: #if defined(PETSC_USE_COMPLEX)
148: pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,
149: &lu->grid,&lu->LUstruct,berr,&stat,&info);
150: #else
151: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,
152: &lu->grid,&lu->LUstruct,berr,&stat,&info);
153: #endif
154: } else { /* distributed mat input */
155: #if defined(PETSC_USE_COMPLEX)
156: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
157: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
158: #else
159: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
160: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
161: #endif
162: }
163: if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
165: if (lu->options.PrintStat) {
166: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
167: }
168: PStatFree(&stat);
169:
170: if (size > 1 && lu->MatInputMode == GLOBAL) {
171: /* convert seq x to mpi x */
172: VecRestoreArray(x_seq,&bptr);
173: VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
174: VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
175: VecScatterDestroy(scat);
176: VecDestroy(x_seq);
177: } else {
178: VecRestoreArray(x,&bptr);
179: lu->matsolve_iscalled = PETSC_TRUE;
180: lu->matmatsolve_iscalled = PETSC_FALSE;
181: }
182: return(0);
183: }
187: PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
188: {
189: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
190: PetscErrorCode ierr;
191: PetscMPIInt size;
192: PetscInt M=A->rmap->N,m=A->rmap->n,nrhs;
193: SuperLUStat_t stat;
194: double berr[1];
195: PetscScalar *bptr;
196: int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
199: if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
200:
201: MPI_Comm_size(((PetscObject)A)->comm,&size);
202: if (size > 1 && lu->MatInputMode == GLOBAL) {
203: SETERRQ1(PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
204: }
205: /* size==1 or distributed mat input */
206: if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){
207: /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
208: thus destroy it and create a new SOLVEstruct.
209: Otherwise it may result in memory corruption or incorrect solution
210: See src/mat/examples/tests/ex125.c */
211: #if defined(PETSC_USE_COMPLEX)
212: zSolveFinalize(&lu->options, &lu->SOLVEstruct);
213: #else
214: dSolveFinalize(&lu->options, &lu->SOLVEstruct);
215: #endif
216: lu->options.SolveInitialized = NO;
217: }
218: MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);
220: MatGetSize(B_mpi,PETSC_NULL,&nrhs);
221:
222: PStatInit(&stat); /* Initialize the statistics variables. */
223: MatGetArray(X,&bptr);
224: if (lu->MatInputMode == GLOBAL) { /* size == 1 */
225: #if defined(PETSC_USE_COMPLEX)
226: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,
227: &lu->grid, &lu->LUstruct, berr, &stat, &info);
228: #else
229: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs,
230: &lu->grid, &lu->LUstruct, berr, &stat, &info);
231: #endif
232: } else { /* distributed mat input */
233: #if defined(PETSC_USE_COMPLEX)
234: pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
235: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
236: #else
237: pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
238: &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
239: #endif
240: }
241: if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
242: MatRestoreArray(X,&bptr);
244: if (lu->options.PrintStat) {
245: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
246: }
247: PStatFree(&stat);
248: lu->matsolve_iscalled = PETSC_FALSE;
249: lu->matmatsolve_iscalled = PETSC_TRUE;
250: return(0);
251: }
256: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
257: {
258: Mat *tseq,A_seq = PETSC_NULL;
259: Mat_SeqAIJ *aa,*bb;
260: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
261: PetscErrorCode ierr;
262: PetscInt M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
263: m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
264: int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
265: PetscMPIInt size,rank;
266: SuperLUStat_t stat;
267: double *berr=0;
268: IS isrow;
269: PetscLogDouble time0,time,time_min,time_max;
270: Mat F_diag=PETSC_NULL;
271: #if defined(PETSC_USE_COMPLEX)
272: doublecomplex *av, *bv;
273: #else
274: double *av, *bv;
275: #endif
278: MPI_Comm_size(((PetscObject)A)->comm,&size);
279: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
280:
281: if (lu->options.PrintStat) { /* collect time for mat conversion */
282: MPI_Barrier(((PetscObject)A)->comm);
283: PetscGetTime(&time0);
284: }
286: if (lu->MatInputMode == GLOBAL) { /* global mat input */
287: if (size > 1) { /* convert mpi A to seq mat A */
288: ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
289: MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
290: ISDestroy(isrow);
291:
292: A_seq = *tseq;
293: PetscFree(tseq);
294: aa = (Mat_SeqAIJ*)A_seq->data;
295: } else {
296: PetscTruth flg;
297: PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
298: if (flg) {
299: Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
300: A = At->A;
301: }
302: aa = (Mat_SeqAIJ*)A->data;
303: }
305: /* Convert Petsc NR matrix to SuperLU_DIST NC.
306: Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
307: if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
308: if (lu->FactPattern == SamePattern_SameRowPerm){
309: Destroy_CompCol_Matrix_dist(&lu->A_sup);
310: /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
311: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
312: } else {
313: Destroy_CompCol_Matrix_dist(&lu->A_sup);
314: Destroy_LU(N, &lu->grid, &lu->LUstruct);
315: lu->options.Fact = SamePattern;
316: }
317: }
318: #if defined(PETSC_USE_COMPLEX)
319: zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
320: #else
321: dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
322: #endif
324: /* Create compressed column matrix A_sup. */
325: #if defined(PETSC_USE_COMPLEX)
326: zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
327: #else
328: dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
329: #endif
330: } else { /* distributed mat input */
331: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
332: aa=(Mat_SeqAIJ*)(mat->A)->data;
333: bb=(Mat_SeqAIJ*)(mat->B)->data;
334: ai=aa->i; aj=aa->j;
335: bi=bb->i; bj=bb->j;
336: #if defined(PETSC_USE_COMPLEX)
337: av=(doublecomplex*)aa->a;
338: bv=(doublecomplex*)bb->a;
339: #else
340: av=aa->a;
341: bv=bb->a;
342: #endif
343: rstart = A->rmap->rstart;
344: nz = aa->nz + bb->nz;
345: garray = mat->garray;
346:
347: if (lu->options.Fact == DOFACT) {/* first numeric factorization */
348: #if defined(PETSC_USE_COMPLEX)
349: zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
350: #else
351: dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
352: #endif
353: } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
354: if (lu->FactPattern == SamePattern_SameRowPerm){
355: /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
356: lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
357: } else {
358: Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
359: lu->options.Fact = SamePattern;
360: }
361: }
362: nz = 0; irow = rstart;
363: for ( i=0; i<m; i++ ) {
364: lu->row[i] = nz;
365: countA = ai[i+1] - ai[i];
366: countB = bi[i+1] - bi[i];
367: ajj = aj + ai[i]; /* ptr to the beginning of this row */
368: bjj = bj + bi[i];
370: /* B part, smaller col index */
371: colA_start = rstart + ajj[0]; /* the smallest global col index of A */
372: jB = 0;
373: for (j=0; j<countB; j++){
374: jcol = garray[bjj[j]];
375: if (jcol > colA_start) {
376: jB = j;
377: break;
378: }
379: lu->col[nz] = jcol;
380: lu->val[nz++] = *bv++;
381: if (j==countB-1) jB = countB;
382: }
384: /* A part */
385: for (j=0; j<countA; j++){
386: lu->col[nz] = rstart + ajj[j];
387: lu->val[nz++] = *av++;
388: }
390: /* B part, larger col index */
391: for (j=jB; j<countB; j++){
392: lu->col[nz] = garray[bjj[j]];
393: lu->val[nz++] = *bv++;
394: }
395: }
396: lu->row[m] = nz;
397: #if defined(PETSC_USE_COMPLEX)
398: zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
399: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
400: #else
401: dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
402: lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
403: #endif
404: }
405: if (lu->options.PrintStat) {
406: PetscGetTime(&time);
407: time0 = time - time0;
408: }
410: /* Factor the matrix. */
411: PStatInit(&stat); /* Initialize the statistics variables. */
412: if (lu->MatInputMode == GLOBAL) { /* global mat input */
413: #if defined(PETSC_USE_COMPLEX)
414: pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
415: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
416: #else
417: pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
418: &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
419: #endif
420: } else { /* distributed mat input */
421: #if defined(PETSC_USE_COMPLEX)
422: pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
423: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
424: if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
425: #else
426: pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
427: &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
428: if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
429: #endif
430: }
432: if (lu->MatInputMode == GLOBAL && size > 1){
433: MatDestroy(A_seq);
434: }
436: if (lu->options.PrintStat) {
437: if (size > 1){
438: MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
439: MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
440: MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
441: time = time/size; /* average time */
442: if (!rank) {
443: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n %g / %g / %g\n",time_max,time_min,time);
444: }
445: } else {
446: PetscPrintf(PETSC_COMM_SELF, " Mat conversion(PETSc->SuperLU_DIST) time: \n %g\n",time0);
447: }
448: PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
449: }
450: PStatFree(&stat);
451: if (size > 1){
452: F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
453: F_diag->assembled = PETSC_TRUE;
454: }
455: (F)->assembled = PETSC_TRUE;
456: (F)->preallocated = PETSC_TRUE;
457: lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
458: return(0);
459: }
461: /* Note the Petsc r and c permutations are ignored */
464: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
465: {
466: Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*) (F)->spptr;
467: PetscInt M=A->rmap->N,N=A->cmap->N;
470: /* Initialize the SuperLU process grid. */
471: superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
473: /* Initialize ScalePermstruct and LUstruct. */
474: ScalePermstructInit(M, N, &lu->ScalePermstruct);
475: LUstructInit(M, N, &lu->LUstruct);
476: (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
477: (F)->ops->solve = MatSolve_SuperLU_DIST;
478: (F)->ops->matsolve = MatMatSolve_SuperLU_DIST;
479: return(0);
480: }
485: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
486: {
488: *type = MAT_SOLVER_SUPERLU_DIST;
489: return(0);
490: }
495: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
496: {
497: Mat B;
498: Mat_SuperLU_DIST *lu;
499: PetscErrorCode ierr;
500: PetscInt M=A->rmap->N,N=A->cmap->N,indx;
501: PetscMPIInt size;
502: superlu_options_t options;
503: PetscTruth flg;
504: const char *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
505: const char *prtype[] = {"LargeDiag","NATURAL"};
506: const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
509: /* Create the factorization matrix */
510: MatCreate(((PetscObject)A)->comm,&B);
511: MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
512: MatSetType(B,((PetscObject)A)->type_name);
513: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
514: MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);
516: B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
517: B->ops->view = MatView_SuperLU_DIST;
518: B->ops->destroy = MatDestroy_SuperLU_DIST;
519: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
520: B->factor = MAT_FACTOR_LU;
522: PetscNewLog(B,Mat_SuperLU_DIST,&lu);
523: B->spptr = lu;
525: /* Set the default input options:
526: options.Fact = DOFACT;
527: options.Equil = YES;
528: options.ParSymbFact = NO;
529: options.ColPerm = MMD_AT_PLUS_A;
530: options.RowPerm = LargeDiag;
531: options.ReplaceTinyPivot = YES;
532: options.IterRefine = DOUBLE;
533: options.Trans = NOTRANS;
534: options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
535: options.RefineInitialized = NO;
536: options.PrintStat = YES;
537: */
538: set_default_options_dist(&options);
540: MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
541: MPI_Comm_size(((PetscObject)A)->comm,&size);
542: /* Default num of process columns and rows */
543: lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
544: if (!lu->npcol) lu->npcol = 1;
545: while (lu->npcol > 0) {
546: lu->nprow = PetscMPIIntCast(size/lu->npcol);
547: if (size == lu->nprow * lu->npcol) break;
548: lu->npcol --;
549: }
550:
551: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
552: PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
553: PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
554: if (size != lu->nprow * lu->npcol)
555: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
556:
557: lu->MatInputMode = DISTRIBUTED;
558: PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
559: if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
561: PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
562: if (!flg) {
563: options.Equil = NO;
564: }
566: PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
567: if (flg) {
568: switch (indx) {
569: case 0:
570: options.RowPerm = LargeDiag;
571: break;
572: case 1:
573: options.RowPerm = NOROWPERM;
574: break;
575: }
576: }
578: PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);
579: if (flg) {
580: switch (indx) {
581: case 0:
582: options.ColPerm = MMD_AT_PLUS_A;
583: break;
584: case 1:
585: options.ColPerm = NATURAL;
586: break;
587: case 2:
588: options.ColPerm = MMD_ATA;
589: break;
590: case 3:
591: options.ColPerm = PARMETIS;
592: break;
593: }
594: }
596: PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
597: if (!flg) {
598: options.ReplaceTinyPivot = NO;
599: }
601: options.ParSymbFact = NO;
602: PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
603: if (flg){
604: #ifdef PETSC_HAVE_PARMETIS
605: options.ParSymbFact = YES;
606: options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
607: #else
608: printf("parsymbfact needs PARMETIS");
609: #endif
610: }
612: lu->FactPattern = SamePattern;
613: PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
614: if (flg) {
615: switch (indx) {
616: case 0:
617: lu->FactPattern = SamePattern;
618: break;
619: case 1:
620: lu->FactPattern = SamePattern_SameRowPerm;
621: break;
622: }
623: }
624:
625: options.IterRefine = NOREFINE;
626: PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
627: if (flg) {
628: options.IterRefine = DOUBLE;
629: }
631: if (PetscLogPrintInfo) {
632: options.PrintStat = YES;
633: } else {
634: options.PrintStat = NO;
635: }
636: PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
637: (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
638: PetscOptionsEnd();
640: lu->options = options;
641: lu->options.Fact = DOFACT;
642: lu->CleanUpSuperLU_Dist = PETSC_TRUE;
643: lu->matsolve_iscalled = PETSC_FALSE;
644: lu->matmatsolve_iscalled = PETSC_FALSE;
645: *F = B;
646: return(0);
647: }
652: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
653: {
657: MatGetFactor_aij_superlu_dist(A,ftype,F);
658: return(0);
659: }
665: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
666: {
670: MatGetFactor_aij_superlu_dist(A,ftype,F);
671: return(0);
672: }
677: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
678: {
679: Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->spptr;
680: superlu_options_t options;
681: PetscErrorCode ierr;
684: /* check if matrix is superlu_dist type */
685: if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);
687: options = lu->options;
688: PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
689: PetscViewerASCIIPrintf(viewer," Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
690: PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
691: PetscViewerASCIIPrintf(viewer," Matrix input mode %d \n",lu->MatInputMode);
692: PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
693: PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
694: PetscViewerASCIIPrintf(viewer," Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
695: PetscViewerASCIIPrintf(viewer," Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
696: if (options.ColPerm == NATURAL) {
697: PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");
698: } else if (options.ColPerm == MMD_AT_PLUS_A) {
699: PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");
700: } else if (options.ColPerm == MMD_ATA) {
701: PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");
702: } else if (options.ColPerm == PARMETIS) {
703: PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");
704: } else {
705: SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
706: }
708: PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);
709:
710: if (lu->FactPattern == SamePattern){
711: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");
712: } else {
713: PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");
714: }
715: return(0);
716: }
720: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
721: {
722: PetscErrorCode ierr;
723: PetscTruth iascii;
724: PetscViewerFormat format;
727: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
728: if (iascii) {
729: PetscViewerGetFormat(viewer,&format);
730: if (format == PETSC_VIEWER_ASCII_INFO) {
731: MatFactorInfo_SuperLU_DIST(A,viewer);
732: }
733: }
734: return(0);
735: }
738: /*MC
739: MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization
741: Works with AIJ matrices
743: Options Database Keys:
744: + -mat_superlu_dist_r <n> - number of rows in processor partition
745: . -mat_superlu_dist_c <n> - number of columns in processor partition
746: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
747: . -mat_superlu_dist_equil - equilibrate the matrix
748: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
749: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
750: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
751: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
752: . -mat_superlu_dist_iterrefine - use iterative refinement
753: - -mat_superlu_dist_statprint - print factorization information
755: Level: beginner
757: .seealso: PCLU
759: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
761: M*/