Actual source code: ispai.c

  1: #define PETSCKSP_DLL

  3: /* 
  4:    3/99 Modified by Stephen Barnard to support SPAI version 3.0 
  5: */

  7: /*
  8:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  9:    Code written by Stephen Barnard.

 11:       Note: there is some BAD memory bleeding below!

 13:       This code needs work

 15:    1) get rid of all memory bleeding
 16:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 17:       rather than having the sp flag for PC_SPAI
 18:    3) fix to set the block size based on the matrix block size 

 20: */

 22:  #include private/pcimpl.h
 23:  #include petscspai.h

 25: /*
 26:     These are the SPAI include files
 27: */
 29: #define MPI /* required for setting SPAI_Comm correctly in basics.h */
 30: #include "spai.h"
 31: #include "matrix.h"

 34: EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
 35: EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
 36: EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
 37: EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);

 39: typedef struct {

 41:   matrix   *B;              /* matrix in SPAI format */
 42:   matrix   *BT;             /* transpose of matrix in SPAI format */
 43:   matrix   *M;              /* the approximate inverse in SPAI format */

 45:   Mat      PM;              /* the approximate inverse PETSc format */

 47:   double   epsilon;         /* tolerance */
 48:   int      nbsteps;         /* max number of "improvement" steps per line */
 49:   int      max;             /* max dimensions of is_I, q, etc. */
 50:   int      maxnew;          /* max number of new entries per step */
 51:   int      block_size;      /* constant block size */
 52:   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
 53:   int      verbose;         /* SPAI prints timing and statistics */

 55:   int      sp;              /* symmetric nonzero pattern */
 56:   MPI_Comm comm_spai;     /* communicator to be used with spai */
 57: } PC_SPAI;

 59: /**********************************************************************/

 63: static PetscErrorCode PCSetUp_SPAI(PC pc)
 64: {
 65:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
 67:   Mat            AT;


 71:   init_SPAI();

 73:   if (ispai->sp) {
 74:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
 75:   } else {
 76:     /* Use the transpose to get the column nonzero structure. */
 77:     MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);
 78:     ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
 79:     MatDestroy(AT);
 80:   }

 82:   /* Destroy the transpose */
 83:   /* Don't know how to do it. PETSc developers? */
 84: 
 85:   /* construct SPAI preconditioner */
 86:   /* FILE *messages */     /* file for warning messages */
 87:   /* double epsilon */     /* tolerance */
 88:   /* int nbsteps */        /* max number of "improvement" steps per line */
 89:   /* int max */            /* max dimensions of is_I, q, etc. */
 90:   /* int maxnew */         /* max number of new entries per step */
 91:   /* int block_size */     /* block_size == 1 specifies scalar elments
 92:                               block_size == n specifies nxn constant-block elements
 93:                               block_size == 0 specifies variable-block elements */
 94:   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
 95:                            /* cache_size == 0 indicates no caching */
 96:   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
 97:                               verbose == 1 prints timing and matrix statistics */

 99:   bspai(ispai->B,&ispai->M,
100:                    stdout,
101:                    ispai->epsilon,
102:                    ispai->nbsteps,
103:                    ispai->max,
104:                    ispai->maxnew,
105:                    ispai->block_size,
106:                    ispai->cache_size,
107:                ispai->verbose);

109:   ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);

111:   /* free the SPAI matrices */
112:   sp_free_matrix(ispai->B);
113:   sp_free_matrix(ispai->M);

115:   return(0);
116: }

118: /**********************************************************************/

122: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
123: {
124:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;

128:   /* Now using PETSc's multiply */
129:   MatMult(ispai->PM,xx,y);
130:   return(0);
131: }

133: /**********************************************************************/

137: static PetscErrorCode PCDestroy_SPAI(PC pc)
138: {
140:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;

143:   if (ispai->PM) {MatDestroy(ispai->PM);}
144:   MPI_Comm_free(&(ispai->comm_spai));
145:   PetscFree(ispai);
146:   return(0);
147: }

149: /**********************************************************************/

153: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154: {
155:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
157:   PetscTruth     iascii;

160:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
161:   if (iascii) {
162:     PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");
163:     PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);
164:     PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);
165:     PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);
166:     PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);
167:     PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);
168:     PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);
169:     PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);
170:     PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);
171:   }
172:   return(0);
173: }

178: PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
179: {
180:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
182:   ispai->epsilon = epsilon1;
183:   return(0);
184: }
186: 
187: /**********************************************************************/

192: PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
193: {
194:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
196:   ispai->nbsteps = nbsteps1;
197:   return(0);
198: }

201: /**********************************************************************/

203: /* added 1/7/99 g.h. */
207: PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
208: {
209:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
211:   ispai->max = max1;
212:   return(0);
213: }

216: /**********************************************************************/

221: PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222: {
223:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
225:   ispai->maxnew = maxnew1;
226:   return(0);
227: }

230: /**********************************************************************/

235: PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236: {
237:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
239:   ispai->block_size = block_size1;
240:   return(0);
241: }

244: /**********************************************************************/

249: PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
250: {
251:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
253:   ispai->cache_size = cache_size;
254:   return(0);
255: }

258: /**********************************************************************/

263: PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
264: {
265:   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
267:   ispai->verbose = verbose;
268:   return(0);
269: }

272: /**********************************************************************/

277: PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
278: {
279:   PC_SPAI *ispai = (PC_SPAI*)pc->data;
281:   ispai->sp = sp;
282:   return(0);
283: }

286: /* -------------------------------------------------------------------*/

290: /*@
291:   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner

293:   Input Parameters:
294: + pc - the preconditioner
295: - eps - epsilon (default .4)

297:   Notes:  Espilon must be between 0 and 1. It controls the
298:                  quality of the approximation of M to the inverse of
299:                  A. Higher values of epsilon lead to more work, more
300:                  fill, and usually better preconditioners. In many
301:                  cases the best choice of epsilon is the one that
302:                  divides the total solution time equally between the
303:                  preconditioner and the solver.
304:   
305:   Level: intermediate

307: .seealso: PCSPAI, PCSetType()
308:   @*/
309: PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
310: {
311:   PetscErrorCode ierr,(*f)(PC,double);
313:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);
314:   if (f) {
315:     (*f)(pc,epsilon1);
316:   }
317:   return(0);
318: }
319: 
320: /**********************************************************************/

324: /*@
325:   PCSPAISetNBSteps - set maximum number of improvement steps per row in 
326:         the SPAI preconditioner

328:   Input Parameters:
329: + pc - the preconditioner
330: - n - number of steps (default 5)

332:   Notes:  SPAI constructs to approximation to every column of
333:                  the exact inverse of A in a series of improvement
334:                  steps. The quality of the approximation is determined
335:                  by epsilon. If an approximation achieving an accuracy
336:                  of epsilon is not obtained after ns steps, SPAI simply
337:                  uses the best approximation constructed so far.

339:   Level: intermediate

341: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
342: @*/
343: PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
344: {
345:   PetscErrorCode ierr,(*f)(PC,int);
347:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);
348:   if (f) {
349:     (*f)(pc,nbsteps1);
350:   }
351:   return(0);
352: }

354: /**********************************************************************/

356: /* added 1/7/99 g.h. */
359: /*@
360:   PCSPAISetMax - set the size of various working buffers in 
361:         the SPAI preconditioner

363:   Input Parameters:
364: + pc - the preconditioner
365: - n - size (default is 5000)

367:   Level: intermediate

369: .seealso: PCSPAI, PCSetType()
370: @*/
371: PetscErrorCode  PCSPAISetMax(PC pc,int max1)
372: {
373:   PetscErrorCode ierr,(*f)(PC,int);
375:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);
376:   if (f) {
377:     (*f)(pc,max1);
378:   }
379:   return(0);
380: }

382: /**********************************************************************/

386: /*@
387:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
388:    in SPAI preconditioner

390:   Input Parameters:
391: + pc - the preconditioner
392: - n - maximum number (default 5)

394:   Level: intermediate

396: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
397: @*/
398: PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
399: {
400:   PetscErrorCode ierr,(*f)(PC,int);
402:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);
403:   if (f) {
404:     (*f)(pc,maxnew1);
405:   }
406:   return(0);
407: }

409: /**********************************************************************/

413: /*@
414:   PCSPAISetBlockSize - set the block size for the SPAI preconditioner

416:   Input Parameters:
417: + pc - the preconditioner
418: - n - block size (default 1)

420:   Notes: A block
421:                  size of 1 treats A as a matrix of scalar elements. A
422:                  block size of s > 1 treats A as a matrix of sxs
423:                  blocks. A block size of 0 treats A as a matrix with
424:                  variable sized blocks, which are determined by
425:                  searching for dense square diagonal blocks in A.
426:                  This can be very effective for finite-element
427:                  matrices.

429:                  SPAI will convert A to block form, use a block
430:                  version of the preconditioner algorithm, and then
431:                  convert the result back to scalar form.

433:                  In many cases the a block-size parameter other than 1
434:                  can lead to very significant improvement in
435:                  performance.


438:   Level: intermediate

440: .seealso: PCSPAI, PCSetType()
441: @*/
442: PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
443: {
444:   PetscErrorCode ierr,(*f)(PC,int);
446:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);
447:   if (f) {
448:     (*f)(pc,block_size1);
449:   }
450:   return(0);
451: }

453: /**********************************************************************/

457: /*@
458:   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner

460:   Input Parameters:
461: + pc - the preconditioner
462: - n -  cache size {0,1,2,3,4,5} (default 5)

464:   Notes:    SPAI uses a hash table to cache messages and avoid
465:                  redundant communication. If suggest always using
466:                  5. This parameter is irrelevant in the serial
467:                  version.

469:   Level: intermediate

471: .seealso: PCSPAI, PCSetType()
472: @*/
473: PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
474: {
475:   PetscErrorCode ierr,(*f)(PC,int);
477:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);
478:   if (f) {
479:     (*f)(pc,cache_size);
480:   }
481:   return(0);
482: }

484: /**********************************************************************/

488: /*@
489:   PCSPAISetVerbose - verbosity level for the SPAI preconditioner

491:   Input Parameters:
492: + pc - the preconditioner
493: - n - level (default 1)

495:   Notes: print parameters, timings and matrix statistics

497:   Level: intermediate

499: .seealso: PCSPAI, PCSetType()
500: @*/
501: PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
502: {
503:   PetscErrorCode ierr,(*f)(PC,int);
505:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);
506:   if (f) {
507:     (*f)(pc,verbose);
508:   }
509:   return(0);
510: }

512: /**********************************************************************/

516: /*@
517:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner

519:   Input Parameters:
520: + pc - the preconditioner
521: - n - 0 or 1

523:   Notes: If A has a symmetric nonzero pattern use -sp 1 to
524:                  improve performance by eliminating some communication
525:                  in the parallel version. Even if A does not have a
526:                  symmetric nonzero pattern -sp 1 may well lead to good
527:                  results, but the code will not follow the published
528:                  SPAI algorithm exactly.


531:   Level: intermediate

533: .seealso: PCSPAI, PCSetType()
534: @*/
535: PetscErrorCode  PCSPAISetSp(PC pc,int sp)
536: {
537:   PetscErrorCode ierr,(*f)(PC,int);
539:   PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);
540:   if (f) {
541:     (*f)(pc,sp);
542:   }
543:   return(0);
544: }

546: /**********************************************************************/

548: /**********************************************************************/

552: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
553: {
554:   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
556:   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
557:   double         epsilon1;
558:   PetscTruth     flg;

561:   PetscOptionsHead("SPAI options");
562:     PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
563:     if (flg) {
564:       PCSPAISetEpsilon(pc,epsilon1);
565:     }
566:     PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
567:     if (flg) {
568:       PCSPAISetNBSteps(pc,nbsteps1);
569:     }
570:     /* added 1/7/99 g.h. */
571:     PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
572:     if (flg) {
573:       PCSPAISetMax(pc,max1);
574:     }
575:     PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
576:     if (flg) {
577:       PCSPAISetMaxNew(pc,maxnew1);
578:     }
579:     PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
580:     if (flg) {
581:       PCSPAISetBlockSize(pc,block_size1);
582:     }
583:     PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
584:     if (flg) {
585:       PCSPAISetCacheSize(pc,cache_size);
586:     }
587:     PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
588:     if (flg) {
589:       PCSPAISetVerbose(pc,verbose);
590:     }
591:     PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
592:     if (flg) {
593:       PCSPAISetSp(pc,sp);
594:     }
595:   PetscOptionsTail();
596:   return(0);
597: }

599: /**********************************************************************/

601: /*MC
602:    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
603:      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)

605:    Options Database Keys:
606: +  -pc_spai_epsilon <eps> - set tolerance
607: .  -pc_spai_nbstep <n> - set nbsteps
608: .  -pc_spai_max <m> - set max
609: .  -pc_spai_max_new <m> - set maxnew
610: .  -pc_spai_block_size <n> - set block size
611: .  -pc_spai_cache_size <n> - set cache size
612: .  -pc_spai_sp <m> - set sp
613: -  -pc_spai_set_verbose <true,false> - verbose output

615:    Notes: This only works with AIJ matrices.

617:    Level: beginner

619:    Concepts: approximate inverse

621: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
622:     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
623:     PCSPAISetVerbose(), PCSPAISetSp()
624: M*/

629: PetscErrorCode  PCCreate_SPAI(PC pc)
630: {
631:   PC_SPAI        *ispai;

635:   PetscNewLog(pc,PC_SPAI,&ispai);
636:   pc->data           = ispai;

638:   pc->ops->destroy         = PCDestroy_SPAI;
639:   pc->ops->apply           = PCApply_SPAI;
640:   pc->ops->applyrichardson = 0;
641:   pc->ops->setup           = PCSetUp_SPAI;
642:   pc->ops->view            = PCView_SPAI;
643:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

645:   ispai->epsilon    = .4;
646:   ispai->nbsteps    = 5;
647:   ispai->max        = 5000;
648:   ispai->maxnew     = 5;
649:   ispai->block_size = 1;
650:   ispai->cache_size = 5;
651:   ispai->verbose    = 0;

653:   ispai->sp         = 1;
654:   MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));

656:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657:                     "PCSPAISetEpsilon_SPAI",
658:                      PCSPAISetEpsilon_SPAI);
659:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660:                     "PCSPAISetNBSteps_SPAI",
661:                      PCSPAISetNBSteps_SPAI);
662:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663:                     "PCSPAISetMax_SPAI",
664:                      PCSPAISetMax_SPAI);
665:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666:                     "PCSPAISetMaxNew_SPAI",
667:                      PCSPAISetMaxNew_SPAI);
668:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669:                     "PCSPAISetBlockSize_SPAI",
670:                      PCSPAISetBlockSize_SPAI);
671:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672:                     "PCSPAISetCacheSize_SPAI",
673:                      PCSPAISetCacheSize_SPAI);
674:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675:                     "PCSPAISetVerbose_SPAI",
676:                      PCSPAISetVerbose_SPAI);
677:   PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678:                     "PCSPAISetSp_SPAI",
679:                      PCSPAISetSp_SPAI);

681:   return(0);
682: }

685: /**********************************************************************/

687: /*
688:    Converts from a PETSc matrix to an SPAI matrix 
689: */
692: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693: {
694:   matrix                  *M;
695:   int                     i,j,col;
696:   int                     row_indx;
697:   int                     len,pe,local_indx,start_indx;
698:   int                     *mapping;
699:   PetscErrorCode          ierr;
700:   const int               *cols;
701:   const double            *vals;
702:   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
703:   PetscMPIInt             size,rank;
704:   struct compressed_lines *rows;

707: 
708:   MPI_Comm_size(comm,&size);
709:   MPI_Comm_rank(comm,&rank);
710:   MatGetSize(A,&n,&n);
711:   MatGetLocalSize(A,&mnl,&nnl);

713:   /*
714:     not sure why a barrier is required. commenting out
715:   MPI_Barrier(comm);
716:   */

718:   M = new_matrix((SPAI_Comm)comm);
719: 
720:   M->n = n;
721:   M->bs = 1;
722:   M->max_block_size = 1;

724:   M->mnls          = (int*)malloc(sizeof(int)*size);
725:   M->start_indices = (int*)malloc(sizeof(int)*size);
726:   M->pe            = (int*)malloc(sizeof(int)*n);
727:   M->block_sizes   = (int*)malloc(sizeof(int)*n);
728:   for (i=0; i<n; i++) M->block_sizes[i] = 1;

730:   MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);

732:   M->start_indices[0] = 0;
733:   for (i=1; i<size; i++) {
734:     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
735:   }

737:   M->mnl = M->mnls[M->myid];
738:   M->my_start_index = M->start_indices[M->myid];

740:   for (i=0; i<size; i++) {
741:     start_indx = M->start_indices[i];
742:     for (j=0; j<M->mnls[i]; j++)
743:       M->pe[start_indx+j] = i;
744:   }

746:   if (AT) {
747:     M->lines = new_compressed_lines(M->mnls[rank],1);
748:   } else {
749:     M->lines = new_compressed_lines(M->mnls[rank],0);
750:   }

752:   rows = M->lines;

754:   /* Determine the mapping from global indices to pointers */
755:   PetscMalloc(M->n*sizeof(int),&mapping);
756:   pe         = 0;
757:   local_indx = 0;
758:   for (i=0; i<M->n; i++) {
759:     if (local_indx >= M->mnls[pe]) {
760:       pe++;
761:       local_indx = 0;
762:     }
763:     mapping[i] = local_indx + M->start_indices[pe];
764:     local_indx++;
765:   }


768:   PetscMalloc(mnl*sizeof(int),&num_ptr);

770:   /*********************************************************/
771:   /************** Set up the row structure *****************/
772:   /*********************************************************/

774:   /* count number of nonzeros in every row */
775:   MatGetOwnershipRange(A,&rstart,&rend);
776:   for (i=rstart; i<rend; i++) {
777:     MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
778:     MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
779:   }

781:   /* allocate buffers */
782:   len = 0;
783:   for (i=0; i<mnl; i++) {
784:     if (len < num_ptr[i]) len = num_ptr[i];
785:   }

787:   for (i=rstart; i<rend; i++) {
788:     row_indx             = i-rstart;
789:     len                  = num_ptr[row_indx];
790:     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
791:     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
792:   }

794:   /* copy the matrix */
795:   for (i=rstart; i<rend; i++) {
796:     row_indx = i - rstart;
797:     MatGetRow(A,i,&nz,&cols,&vals);
798:     for (j=0; j<nz; j++) {
799:       col = cols[j];
800:       len = rows->len[row_indx]++;
801:       rows->ptrs[row_indx][len] = mapping[col];
802:       rows->A[row_indx][len]    = vals[j];
803:     }
804:     rows->slen[row_indx] = rows->len[row_indx];
805:     MatRestoreRow(A,i,&nz,&cols,&vals);
806:   }


809:   /************************************************************/
810:   /************** Set up the column structure *****************/
811:   /*********************************************************/

813:   if (AT) {

815:     /* count number of nonzeros in every column */
816:     for (i=rstart; i<rend; i++) {
817:       MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
818:       MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
819:     }

821:     /* allocate buffers */
822:     len = 0;
823:     for (i=0; i<mnl; i++) {
824:       if (len < num_ptr[i]) len = num_ptr[i];
825:     }

827:     for (i=rstart; i<rend; i++) {
828:       row_indx = i-rstart;
829:       len      = num_ptr[row_indx];
830:       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
831:     }

833:     /* copy the matrix (i.e., the structure) */
834:     for (i=rstart; i<rend; i++) {
835:       row_indx = i - rstart;
836:       MatGetRow(AT,i,&nz,&cols,&vals);
837:       for (j=0; j<nz; j++) {
838:         col = cols[j];
839:         len = rows->rlen[row_indx]++;
840:         rows->rptrs[row_indx][len] = mapping[col];
841:       }
842:       MatRestoreRow(AT,i,&nz,&cols,&vals);
843:     }
844:   }

846:   PetscFree(num_ptr);
847:   PetscFree(mapping);

849:   order_pointers(M);
850:   M->maxnz = calc_maxnz(M);

852:   *B = M;

854:   return(0);
855: }

857: /**********************************************************************/

859: /*
860:    Converts from an SPAI matrix B  to a PETSc matrix PB.
861:    This assumes that the the SPAI matrix B is stored in
862:    COMPRESSED-ROW format.
863: */
866: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
867: {
868:   PetscMPIInt    size,rank;
870:   int            m,n,M,N;
871:   int            d_nz,o_nz;
872:   int            *d_nnz,*o_nnz;
873:   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
874:   PetscScalar    val;

877:   MPI_Comm_size(comm,&size);
878:   MPI_Comm_rank(comm,&rank);
879: 
880:   m = n = B->mnls[rank];
881:   d_nz = o_nz = 0;

883:   /* Determine preallocation for MatCreateMPIAIJ */
884:   PetscMalloc(m*sizeof(PetscInt),&d_nnz);
885:   PetscMalloc(m*sizeof(PetscInt),&o_nnz);
886:   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
887:   first_diag_col = B->start_indices[rank];
888:   last_diag_col = first_diag_col + B->mnls[rank];
889:   for (i=0; i<B->mnls[rank]; i++) {
890:     for (k=0; k<B->lines->len[i]; k++) {
891:       global_col = B->lines->ptrs[i][k];
892:       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
893:         d_nnz[i]++;
894:       else
895:         o_nnz[i]++;
896:     }
897:   }

899:   M = N = B->n;
900:   /* Here we only know how to create AIJ format */
901:   MatCreate(comm,PB);
902:   MatSetSizes(*PB,m,n,M,N);
903:   MatSetType(*PB,MATAIJ);
904:   MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
905:   MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);

907:   for (i=0; i<B->mnls[rank]; i++) {
908:     global_row = B->start_indices[rank]+i;
909:     for (k=0; k<B->lines->len[i]; k++) {
910:       global_col = B->lines->ptrs[i][k];
911:       val = B->lines->A[i][k];
912:       MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
913:     }
914:   }

916:   PetscFree(d_nnz);
917:   PetscFree(o_nnz);

919:   MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
920:   MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);

922:   return(0);
923: }

925: /**********************************************************************/

927: /*
928:    Converts from an SPAI vector v  to a PETSc vec Pv.
929: */
932: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
933: {
935:   PetscMPIInt    size,rank;
936:   int            m,M,i,*mnls,*start_indices,*global_indices;
937: 
939:   MPI_Comm_size(comm,&size);
940:   MPI_Comm_rank(comm,&rank);
941: 
942:   m = v->mnl;
943:   M = v->n;
944: 
945: 
946:   VecCreateMPI(comm,m,M,Pv);

948:   PetscMalloc(size*sizeof(int),&mnls);
949:   MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);
950: 
951:   PetscMalloc(size*sizeof(int),&start_indices);
952:   start_indices[0] = 0;
953:   for (i=1; i<size; i++)
954:     start_indices[i] = start_indices[i-1] +mnls[i-1];
955: 
956:   PetscMalloc(v->mnl*sizeof(int),&global_indices);
957:   for (i=0; i<v->mnl; i++)
958:     global_indices[i] = start_indices[rank] + i;

960:   PetscFree(mnls);
961:   PetscFree(start_indices);
962: 
963:   VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);
964:   VecAssemblyBegin(*Pv);
965:   VecAssemblyEnd(*Pv);

967:   PetscFree(global_indices);
968:   return(0);
969: }