Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.2 sparse solver
  5: */

  7: #include "../src/mat/impls/aij/seq/aij.h"
  8: #include "../src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 13: #if defined(PETSC_USE_64BIT_INDICES)
 14: /* ugly SuperLU_Dist variable telling it to use long long int */
 15: #define _LONGINT
 16: #endif

 19: #if defined(PETSC_USE_COMPLEX)
 20: #include "superlu_zdefs.h"
 21: #else
 22: #include "superlu_ddefs.h"
 23: #endif

 26: typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
 27: const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};

 29: typedef struct {
 30:   int_t                   nprow,npcol,*row,*col;
 31:   gridinfo_t              grid;
 32:   superlu_options_t       options;
 33:   SuperMatrix             A_sup;
 34:   ScalePermstruct_t       ScalePermstruct;
 35:   LUstruct_t              LUstruct;
 36:   int                     StatPrint;
 37:   SuperLU_MatInputMode    MatInputMode;
 38:   SOLVEstruct_t           SOLVEstruct;
 39:   fact_t                  FactPattern;
 40:   MPI_Comm                comm_superlu;
 41: #if defined(PETSC_USE_COMPLEX)
 42:   doublecomplex           *val;
 43: #else
 44:   double                  *val;
 45: #endif
 46:   PetscTruth              matsolve_iscalled,matmatsolve_iscalled;
 47:   PetscTruth CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
 48: } Mat_SuperLU_DIST;


 60: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 61: {
 62:   PetscErrorCode   ierr;
 63:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
 64:   PetscTruth       flg;
 65: 
 67:   if (lu->CleanUpSuperLU_Dist) {
 68:     /* Deallocate SuperLU_DIST storage */
 69:     if (lu->MatInputMode == GLOBAL) {
 70:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
 71:     } else {
 72:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
 73:       if ( lu->options.SolveInitialized ) {
 74: #if defined(PETSC_USE_COMPLEX)
 75:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
 76: #else
 77:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
 78: #endif
 79:       }
 80:     }
 81:     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
 82:     ScalePermstructFree(&lu->ScalePermstruct);
 83:     LUstructFree(&lu->LUstruct);
 84: 
 85:     /* Release the SuperLU_DIST process grid. */
 86:     superlu_gridexit(&lu->grid);
 87: 
 88:     MPI_Comm_free(&(lu->comm_superlu));
 89:   }

 91:   PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
 92:   if (flg) {
 93:     MatDestroy_SeqAIJ(A);
 94:   } else {
 95:     MatDestroy_MPIAIJ(A);
 96:   }
 97:   return(0);
 98: }

102: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
103: {
104:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
105:   PetscErrorCode   ierr;
106:   PetscMPIInt      size;
107:   PetscInt         m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
108:   SuperLUStat_t    stat;
109:   double           berr[1];
110:   PetscScalar      *bptr;
111:   PetscInt         nrhs=1;
112:   Vec              x_seq;
113:   IS               iden;
114:   VecScatter       scat;
115:   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */

118:   MPI_Comm_size(((PetscObject)A)->comm,&size);
119:   if (size > 1 && lu->MatInputMode == GLOBAL) {
120:     /* global mat input, convert b to x_seq */
121:     VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
122:     ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
123:     VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
124:     ISDestroy(iden);

126:     VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
127:     VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
128:     VecGetArray(x_seq,&bptr);
129:   } else { /* size==1 || distributed mat input */
130:     if (lu->options.SolveInitialized && !lu->matsolve_iscalled){
131:       /* see comments in MatMatSolve() */
132: #if defined(PETSC_USE_COMPLEX)
133:       zSolveFinalize(&lu->options, &lu->SOLVEstruct);
134: #else
135:       dSolveFinalize(&lu->options, &lu->SOLVEstruct);
136: #endif
137:       lu->options.SolveInitialized = NO;
138:     }
139:     VecCopy(b_mpi,x);
140:     VecGetArray(x,&bptr);
141:   }
142: 
143:   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

145:   PStatInit(&stat);        /* Initialize the statistics variables. */
146:   if (lu->MatInputMode == GLOBAL) {
147: #if defined(PETSC_USE_COMPLEX)
148:     pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,
149:                    &lu->grid,&lu->LUstruct,berr,&stat,&info);
150: #else
151:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,
152:                    &lu->grid,&lu->LUstruct,berr,&stat,&info);
153: #endif 
154:   } else { /* distributed mat input */
155: #if defined(PETSC_USE_COMPLEX)
156:     pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
157:             &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
158: #else
159:     pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
160:             &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
161: #endif
162:   }
163:   if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);

165:   if (lu->options.PrintStat) {
166:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
167:   }
168:   PStatFree(&stat);
169: 
170:   if (size > 1 && lu->MatInputMode == GLOBAL) {
171:     /* convert seq x to mpi x */
172:     VecRestoreArray(x_seq,&bptr);
173:     VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
174:     VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
175:     VecScatterDestroy(scat);
176:     VecDestroy(x_seq);
177:   } else {
178:     VecRestoreArray(x,&bptr);
179:     lu->matsolve_iscalled    = PETSC_TRUE;
180:     lu->matmatsolve_iscalled = PETSC_FALSE;
181:   }
182:   return(0);
183: }

187: PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
188: {
189:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
190:   PetscErrorCode   ierr;
191:   PetscMPIInt      size;
192:   PetscInt         M=A->rmap->N,m=A->rmap->n,nrhs;
193:   SuperLUStat_t    stat;
194:   double           berr[1];
195:   PetscScalar      *bptr;
196:   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */

199:   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
200: 
201:   MPI_Comm_size(((PetscObject)A)->comm,&size);
202:   if (size > 1 && lu->MatInputMode == GLOBAL) {
203:     SETERRQ1(PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
204:   }
205:   /* size==1 or distributed mat input */
206:   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){
207:     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 
208:        thus destroy it and create a new SOLVEstruct.
209:        Otherwise it may result in memory corruption or incorrect solution 
210:        See src/mat/examples/tests/ex125.c */
211: #if defined(PETSC_USE_COMPLEX)
212:     zSolveFinalize(&lu->options, &lu->SOLVEstruct);
213: #else
214:     dSolveFinalize(&lu->options, &lu->SOLVEstruct);
215: #endif
216:     lu->options.SolveInitialized  = NO;
217:   }
218:   MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);

220:   MatGetSize(B_mpi,PETSC_NULL,&nrhs);
221: 
222:   PStatInit(&stat);        /* Initialize the statistics variables. */
223:   MatGetArray(X,&bptr);
224:   if (lu->MatInputMode == GLOBAL) { /* size == 1 */
225: #if defined(PETSC_USE_COMPLEX)
226:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,
227:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
228: #else
229:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs,
230:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
231: #endif 
232:   } else { /* distributed mat input */
233: #if defined(PETSC_USE_COMPLEX)
234:     pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
235:             &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
236: #else
237:     pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
238:             &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
239: #endif
240:   }
241:   if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
242:   MatRestoreArray(X,&bptr);

244:   if (lu->options.PrintStat) {
245:      PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
246:   }
247:   PStatFree(&stat);
248:   lu->matsolve_iscalled    = PETSC_FALSE;
249:   lu->matmatsolve_iscalled = PETSC_TRUE;
250:   return(0);
251: }


256: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
257: {
258:   Mat              *tseq,A_seq = PETSC_NULL;
259:   Mat_SeqAIJ       *aa,*bb;
260:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
261:   PetscErrorCode   ierr;
262:   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
263:                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
264:   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
265:   PetscMPIInt      size,rank;
266:   SuperLUStat_t    stat;
267:   double           *berr=0;
268:   IS               isrow;
269:   PetscLogDouble   time0,time,time_min,time_max;
270:   Mat              F_diag=PETSC_NULL;
271: #if defined(PETSC_USE_COMPLEX)
272:   doublecomplex    *av, *bv;
273: #else
274:   double           *av, *bv;
275: #endif

278:   MPI_Comm_size(((PetscObject)A)->comm,&size);
279:   MPI_Comm_rank(((PetscObject)A)->comm,&rank);
280: 
281:   if (lu->options.PrintStat) { /* collect time for mat conversion */
282:     MPI_Barrier(((PetscObject)A)->comm);
283:     PetscGetTime(&time0);
284:   }

286:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
287:     if (size > 1) { /* convert mpi A to seq mat A */
288:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
289:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
290:       ISDestroy(isrow);
291: 
292:       A_seq = *tseq;
293:       PetscFree(tseq);
294:       aa =  (Mat_SeqAIJ*)A_seq->data;
295:     } else {
296:       PetscTruth flg;
297:       PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);
298:       if (flg) {
299:         Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
300:         A = At->A;
301:       }
302:       aa =  (Mat_SeqAIJ*)A->data;
303:     }

305:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
306:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
307:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
308:       if (lu->FactPattern == SamePattern_SameRowPerm){
309:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
310:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
311:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
312:       } else {
313:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
314:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
315:         lu->options.Fact = SamePattern;
316:       }
317:     }
318: #if defined(PETSC_USE_COMPLEX)
319:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
320: #else
321:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
322: #endif

324:     /* Create compressed column matrix A_sup. */
325: #if defined(PETSC_USE_COMPLEX)
326:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
327: #else
328:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
329: #endif
330:   } else { /* distributed mat input */
331:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
332:     aa=(Mat_SeqAIJ*)(mat->A)->data;
333:     bb=(Mat_SeqAIJ*)(mat->B)->data;
334:     ai=aa->i; aj=aa->j;
335:     bi=bb->i; bj=bb->j;
336: #if defined(PETSC_USE_COMPLEX)
337:     av=(doublecomplex*)aa->a;
338:     bv=(doublecomplex*)bb->a;
339: #else
340:     av=aa->a;
341:     bv=bb->a;
342: #endif
343:     rstart = A->rmap->rstart;
344:     nz     = aa->nz + bb->nz;
345:     garray = mat->garray;
346: 
347:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
348: #if defined(PETSC_USE_COMPLEX)
349:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
350: #else
351:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
352: #endif
353:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
354:       if (lu->FactPattern == SamePattern_SameRowPerm){
355:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
356:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
357:       } else {
358:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
359:         lu->options.Fact = SamePattern;
360:       }
361:     }
362:     nz = 0; irow = rstart;
363:     for ( i=0; i<m; i++ ) {
364:       lu->row[i] = nz;
365:       countA = ai[i+1] - ai[i];
366:       countB = bi[i+1] - bi[i];
367:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
368:       bjj = bj + bi[i];

370:       /* B part, smaller col index */
371:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
372:       jB = 0;
373:       for (j=0; j<countB; j++){
374:         jcol = garray[bjj[j]];
375:         if (jcol > colA_start) {
376:           jB = j;
377:           break;
378:         }
379:         lu->col[nz] = jcol;
380:         lu->val[nz++] = *bv++;
381:         if (j==countB-1) jB = countB;
382:       }

384:       /* A part */
385:       for (j=0; j<countA; j++){
386:         lu->col[nz] = rstart + ajj[j];
387:         lu->val[nz++] = *av++;
388:       }

390:       /* B part, larger col index */
391:       for (j=jB; j<countB; j++){
392:         lu->col[nz] = garray[bjj[j]];
393:         lu->val[nz++] = *bv++;
394:       }
395:     }
396:     lu->row[m] = nz;
397: #if defined(PETSC_USE_COMPLEX)
398:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
399:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
400: #else
401:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
402:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
403: #endif
404:   }
405:   if (lu->options.PrintStat) {
406:     PetscGetTime(&time);
407:     time0 = time - time0;
408:   }

410:   /* Factor the matrix. */
411:   PStatInit(&stat);   /* Initialize the statistics variables. */
412:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
413: #if defined(PETSC_USE_COMPLEX)
414:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
415:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
416: #else
417:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
418:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
419: #endif 
420:   } else { /* distributed mat input */
421: #if defined(PETSC_USE_COMPLEX)
422:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
423:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
424:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
425: #else
426:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
427:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
428:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
429: #endif
430:   }

432:   if (lu->MatInputMode == GLOBAL && size > 1){
433:     MatDestroy(A_seq);
434:   }

436:   if (lu->options.PrintStat) {
437:     if (size > 1){
438:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
439:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
440:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
441:       time = time/size; /* average time */
442:       if (!rank) {
443:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);
444:       }
445:     } else {
446:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);
447:     }
448:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
449:   }
450:   PStatFree(&stat);
451:   if (size > 1){
452:     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
453:     F_diag->assembled = PETSC_TRUE;
454:   }
455:   (F)->assembled    = PETSC_TRUE;
456:   (F)->preallocated = PETSC_TRUE;
457:   lu->options.Fact  = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
458:   return(0);
459: }

461: /* Note the Petsc r and c permutations are ignored */
464: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
465: {
466:   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
467:   PetscInt          M=A->rmap->N,N=A->cmap->N;

470:   /* Initialize the SuperLU process grid. */
471:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

473:   /* Initialize ScalePermstruct and LUstruct. */
474:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
475:   LUstructInit(M, N, &lu->LUstruct);
476:   (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
477:   (F)->ops->solve           = MatSolve_SuperLU_DIST;
478:   (F)->ops->matsolve        = MatMatSolve_SuperLU_DIST;
479:   return(0);
480: }

485: PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
486: {
488:   *type = MAT_SOLVER_SUPERLU_DIST;
489:   return(0);
490: }

495: PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
496: {
497:   Mat               B;
498:   Mat_SuperLU_DIST  *lu;
499:   PetscErrorCode    ierr;
500:   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
501:   PetscMPIInt       size;
502:   superlu_options_t options;
503:   PetscTruth        flg;
504:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
505:   const char        *prtype[] = {"LargeDiag","NATURAL"};
506:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

509:   /* Create the factorization matrix */
510:   MatCreate(((PetscObject)A)->comm,&B);
511:   MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);
512:   MatSetType(B,((PetscObject)A)->type_name);
513:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
514:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

516:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
517:   B->ops->view             = MatView_SuperLU_DIST;
518:   B->ops->destroy          = MatDestroy_SuperLU_DIST;
519:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);
520:   B->factor                = MAT_FACTOR_LU;

522:   PetscNewLog(B,Mat_SuperLU_DIST,&lu);
523:   B->spptr = lu;

525:   /* Set the default input options:
526:      options.Fact              = DOFACT;
527:      options.Equil             = YES;
528:      options.ParSymbFact       = NO;
529:      options.ColPerm           = MMD_AT_PLUS_A;
530:      options.RowPerm           = LargeDiag;
531:      options.ReplaceTinyPivot  = YES;
532:      options.IterRefine        = DOUBLE;
533:      options.Trans             = NOTRANS;
534:      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
535:      options.RefineInitialized = NO;
536:      options.PrintStat         = YES;
537:   */
538:   set_default_options_dist(&options);

540:   MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));
541:   MPI_Comm_size(((PetscObject)A)->comm,&size);
542:   /* Default num of process columns and rows */
543:   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
544:   if (!lu->npcol) lu->npcol = 1;
545:   while (lu->npcol > 0) {
546:     lu->nprow = PetscMPIIntCast(size/lu->npcol);
547:     if (size == lu->nprow * lu->npcol) break;
548:     lu->npcol --;
549:   }
550: 
551:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");
552:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
553:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
554:     if (size != lu->nprow * lu->npcol)
555:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
556: 
557:     lu->MatInputMode = DISTRIBUTED;
558:     PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);
559:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

561:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
562:     if (!flg) {
563:       options.Equil = NO;
564:     }

566:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
567:     if (flg) {
568:       switch (indx) {
569:       case 0:
570:         options.RowPerm = LargeDiag;
571:         break;
572:       case 1:
573:         options.RowPerm = NOROWPERM;
574:         break;
575:       }
576:     }

578:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);
579:     if (flg) {
580:       switch (indx) {
581:       case 0:
582:         options.ColPerm = MMD_AT_PLUS_A;
583:         break;
584:       case 1:
585:         options.ColPerm = NATURAL;
586:         break;
587:       case 2:
588:         options.ColPerm = MMD_ATA;
589:         break;
590:       case 3:
591:         options.ColPerm = PARMETIS;
592:         break;
593:       }
594:     }

596:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
597:     if (!flg) {
598:       options.ReplaceTinyPivot = NO;
599:     }

601:     options.ParSymbFact = NO;
602:     PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);
603:     if (flg){
604: #ifdef PETSC_HAVE_PARMETIS
605:       options.ParSymbFact = YES;
606:       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
607: #else
608:       printf("parsymbfact needs PARMETIS");
609: #endif
610:     }

612:     lu->FactPattern = SamePattern;
613:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
614:     if (flg) {
615:       switch (indx) {
616:       case 0:
617:         lu->FactPattern = SamePattern;
618:         break;
619:       case 1:
620:         lu->FactPattern = SamePattern_SameRowPerm;
621:         break;
622:       }
623:     }
624: 
625:     options.IterRefine = NOREFINE;
626:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
627:     if (flg) {
628:       options.IterRefine = DOUBLE;
629:     }

631:     if (PetscLogPrintInfo) {
632:       options.PrintStat = YES;
633:     } else {
634:       options.PrintStat = NO;
635:     }
636:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
637:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
638:   PetscOptionsEnd();

640:   lu->options             = options;
641:   lu->options.Fact        = DOFACT;
642:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
643:   lu->matsolve_iscalled    = PETSC_FALSE;
644:   lu->matmatsolve_iscalled = PETSC_FALSE;
645:   *F = B;
646:   return(0);
647: }

652: PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
653: {

657:   MatGetFactor_aij_superlu_dist(A,ftype,F);
658:   return(0);
659: }

665: PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
666: {

670:   MatGetFactor_aij_superlu_dist(A,ftype,F);
671:   return(0);
672: }

677: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
678: {
679:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
680:   superlu_options_t options;
681:   PetscErrorCode    ierr;

684:   /* check if matrix is superlu_dist type */
685:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

687:   options = lu->options;
688:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
689:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
690:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
691:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
692:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
693:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
694:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
695:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
696:   if (options.ColPerm == NATURAL) {
697:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
698:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
699:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
700:   } else if (options.ColPerm == MMD_ATA) {
701:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
702:   } else if (options.ColPerm == PARMETIS) {
703:     PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");
704:   } else {
705:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
706:   }

708:   PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);
709: 
710:   if (lu->FactPattern == SamePattern){
711:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
712:   } else {
713:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
714:   }
715:   return(0);
716: }

720: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
721: {
722:   PetscErrorCode    ierr;
723:   PetscTruth        iascii;
724:   PetscViewerFormat format;

727:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
728:   if (iascii) {
729:     PetscViewerGetFormat(viewer,&format);
730:     if (format == PETSC_VIEWER_ASCII_INFO) {
731:       MatFactorInfo_SuperLU_DIST(A,viewer);
732:     }
733:   }
734:   return(0);
735: }


738: /*MC
739:   MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization

741:    Works with AIJ matrices  

743:   Options Database Keys:
744: + -mat_superlu_dist_r <n> - number of rows in processor partition
745: . -mat_superlu_dist_c <n> - number of columns in processor partition
746: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
747: . -mat_superlu_dist_equil - equilibrate the matrix
748: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
749: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
750: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
751: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
752: . -mat_superlu_dist_iterrefine - use iterative refinement
753: - -mat_superlu_dist_statprint - print factorization information

755:    Level: beginner

757: .seealso: PCLU

759: .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage

761: M*/