Actual source code: ispai.c
1: #define PETSCKSP_DLL
3: /*
4: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
5: */
7: /*
8: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9: Code written by Stephen Barnard.
11: Note: there is some BAD memory bleeding below!
13: This code needs work
15: 1) get rid of all memory bleeding
16: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17: rather than having the sp flag for PC_SPAI
18: 3) fix to set the block size based on the matrix block size
20: */
22: #include private/pcimpl.h
23: #include petscspai.h
25: /*
26: These are the SPAI include files
27: */
29: #define MPI /* required for setting SPAI_Comm correctly in basics.h */
30: #include "spai.h"
31: #include "matrix.h"
34: EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
35: EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
36: EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
37: EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
39: typedef struct {
41: matrix *B; /* matrix in SPAI format */
42: matrix *BT; /* transpose of matrix in SPAI format */
43: matrix *M; /* the approximate inverse in SPAI format */
45: Mat PM; /* the approximate inverse PETSc format */
47: double epsilon; /* tolerance */
48: int nbsteps; /* max number of "improvement" steps per line */
49: int max; /* max dimensions of is_I, q, etc. */
50: int maxnew; /* max number of new entries per step */
51: int block_size; /* constant block size */
52: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
53: int verbose; /* SPAI prints timing and statistics */
55: int sp; /* symmetric nonzero pattern */
56: MPI_Comm comm_spai; /* communicator to be used with spai */
57: } PC_SPAI;
59: /**********************************************************************/
63: static PetscErrorCode PCSetUp_SPAI(PC pc)
64: {
65: PC_SPAI *ispai = (PC_SPAI*)pc->data;
67: Mat AT;
71: init_SPAI();
73: if (ispai->sp) {
74: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);
75: } else {
76: /* Use the transpose to get the column nonzero structure. */
77: MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);
78: ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);
79: MatDestroy(AT);
80: }
82: /* Destroy the transpose */
83: /* Don't know how to do it. PETSc developers? */
84:
85: /* construct SPAI preconditioner */
86: /* FILE *messages */ /* file for warning messages */
87: /* double epsilon */ /* tolerance */
88: /* int nbsteps */ /* max number of "improvement" steps per line */
89: /* int max */ /* max dimensions of is_I, q, etc. */
90: /* int maxnew */ /* max number of new entries per step */
91: /* int block_size */ /* block_size == 1 specifies scalar elments
92: block_size == n specifies nxn constant-block elements
93: block_size == 0 specifies variable-block elements */
94: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache */
95: /* cache_size == 0 indicates no caching */
96: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
97: verbose == 1 prints timing and matrix statistics */
99: bspai(ispai->B,&ispai->M,
100: stdout,
101: ispai->epsilon,
102: ispai->nbsteps,
103: ispai->max,
104: ispai->maxnew,
105: ispai->block_size,
106: ispai->cache_size,
107: ispai->verbose);
109: ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);
111: /* free the SPAI matrices */
112: sp_free_matrix(ispai->B);
113: sp_free_matrix(ispai->M);
115: return(0);
116: }
118: /**********************************************************************/
122: static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
123: {
124: PC_SPAI *ispai = (PC_SPAI*)pc->data;
128: /* Now using PETSc's multiply */
129: MatMult(ispai->PM,xx,y);
130: return(0);
131: }
133: /**********************************************************************/
137: static PetscErrorCode PCDestroy_SPAI(PC pc)
138: {
140: PC_SPAI *ispai = (PC_SPAI*)pc->data;
143: if (ispai->PM) {MatDestroy(ispai->PM);}
144: MPI_Comm_free(&(ispai->comm_spai));
145: PetscFree(ispai);
146: return(0);
147: }
149: /**********************************************************************/
153: static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
154: {
155: PC_SPAI *ispai = (PC_SPAI*)pc->data;
157: PetscTruth iascii;
160: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
161: if (iascii) {
162: PetscViewerASCIIPrintf(viewer," SPAI preconditioner\n");
163: PetscViewerASCIIPrintf(viewer," epsilon %G\n", ispai->epsilon);
164: PetscViewerASCIIPrintf(viewer," nbsteps %d\n", ispai->nbsteps);
165: PetscViewerASCIIPrintf(viewer," max %d\n", ispai->max);
166: PetscViewerASCIIPrintf(viewer," maxnew %d\n", ispai->maxnew);
167: PetscViewerASCIIPrintf(viewer," block_size %d\n",ispai->block_size);
168: PetscViewerASCIIPrintf(viewer," cache_size %d\n",ispai->cache_size);
169: PetscViewerASCIIPrintf(viewer," verbose %d\n", ispai->verbose);
170: PetscViewerASCIIPrintf(viewer," sp %d\n", ispai->sp);
171: }
172: return(0);
173: }
178: PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
179: {
180: PC_SPAI *ispai = (PC_SPAI*)pc->data;
182: ispai->epsilon = epsilon1;
183: return(0);
184: }
186:
187: /**********************************************************************/
192: PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
193: {
194: PC_SPAI *ispai = (PC_SPAI*)pc->data;
196: ispai->nbsteps = nbsteps1;
197: return(0);
198: }
201: /**********************************************************************/
203: /* added 1/7/99 g.h. */
207: PetscErrorCode PCSPAISetMax_SPAI(PC pc,int max1)
208: {
209: PC_SPAI *ispai = (PC_SPAI*)pc->data;
211: ispai->max = max1;
212: return(0);
213: }
216: /**********************************************************************/
221: PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
222: {
223: PC_SPAI *ispai = (PC_SPAI*)pc->data;
225: ispai->maxnew = maxnew1;
226: return(0);
227: }
230: /**********************************************************************/
235: PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236: {
237: PC_SPAI *ispai = (PC_SPAI*)pc->data;
239: ispai->block_size = block_size1;
240: return(0);
241: }
244: /**********************************************************************/
249: PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
250: {
251: PC_SPAI *ispai = (PC_SPAI*)pc->data;
253: ispai->cache_size = cache_size;
254: return(0);
255: }
258: /**********************************************************************/
263: PetscErrorCode PCSPAISetVerbose_SPAI(PC pc,int verbose)
264: {
265: PC_SPAI *ispai = (PC_SPAI*)pc->data;
267: ispai->verbose = verbose;
268: return(0);
269: }
272: /**********************************************************************/
277: PetscErrorCode PCSPAISetSp_SPAI(PC pc,int sp)
278: {
279: PC_SPAI *ispai = (PC_SPAI*)pc->data;
281: ispai->sp = sp;
282: return(0);
283: }
286: /* -------------------------------------------------------------------*/
290: /*@
291: PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
293: Input Parameters:
294: + pc - the preconditioner
295: - eps - epsilon (default .4)
297: Notes: Espilon must be between 0 and 1. It controls the
298: quality of the approximation of M to the inverse of
299: A. Higher values of epsilon lead to more work, more
300: fill, and usually better preconditioners. In many
301: cases the best choice of epsilon is the one that
302: divides the total solution time equally between the
303: preconditioner and the solver.
304:
305: Level: intermediate
307: .seealso: PCSPAI, PCSetType()
308: @*/
309: PetscErrorCode PCSPAISetEpsilon(PC pc,double epsilon1)
310: {
311: PetscErrorCode ierr,(*f)(PC,double);
313: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);
314: if (f) {
315: (*f)(pc,epsilon1);
316: }
317: return(0);
318: }
319:
320: /**********************************************************************/
324: /*@
325: PCSPAISetNBSteps - set maximum number of improvement steps per row in
326: the SPAI preconditioner
328: Input Parameters:
329: + pc - the preconditioner
330: - n - number of steps (default 5)
332: Notes: SPAI constructs to approximation to every column of
333: the exact inverse of A in a series of improvement
334: steps. The quality of the approximation is determined
335: by epsilon. If an approximation achieving an accuracy
336: of epsilon is not obtained after ns steps, SPAI simply
337: uses the best approximation constructed so far.
339: Level: intermediate
341: .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
342: @*/
343: PetscErrorCode PCSPAISetNBSteps(PC pc,int nbsteps1)
344: {
345: PetscErrorCode ierr,(*f)(PC,int);
347: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);
348: if (f) {
349: (*f)(pc,nbsteps1);
350: }
351: return(0);
352: }
354: /**********************************************************************/
356: /* added 1/7/99 g.h. */
359: /*@
360: PCSPAISetMax - set the size of various working buffers in
361: the SPAI preconditioner
363: Input Parameters:
364: + pc - the preconditioner
365: - n - size (default is 5000)
367: Level: intermediate
369: .seealso: PCSPAI, PCSetType()
370: @*/
371: PetscErrorCode PCSPAISetMax(PC pc,int max1)
372: {
373: PetscErrorCode ierr,(*f)(PC,int);
375: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);
376: if (f) {
377: (*f)(pc,max1);
378: }
379: return(0);
380: }
382: /**********************************************************************/
386: /*@
387: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
388: in SPAI preconditioner
390: Input Parameters:
391: + pc - the preconditioner
392: - n - maximum number (default 5)
394: Level: intermediate
396: .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
397: @*/
398: PetscErrorCode PCSPAISetMaxNew(PC pc,int maxnew1)
399: {
400: PetscErrorCode ierr,(*f)(PC,int);
402: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);
403: if (f) {
404: (*f)(pc,maxnew1);
405: }
406: return(0);
407: }
409: /**********************************************************************/
413: /*@
414: PCSPAISetBlockSize - set the block size for the SPAI preconditioner
416: Input Parameters:
417: + pc - the preconditioner
418: - n - block size (default 1)
420: Notes: A block
421: size of 1 treats A as a matrix of scalar elements. A
422: block size of s > 1 treats A as a matrix of sxs
423: blocks. A block size of 0 treats A as a matrix with
424: variable sized blocks, which are determined by
425: searching for dense square diagonal blocks in A.
426: This can be very effective for finite-element
427: matrices.
429: SPAI will convert A to block form, use a block
430: version of the preconditioner algorithm, and then
431: convert the result back to scalar form.
433: In many cases the a block-size parameter other than 1
434: can lead to very significant improvement in
435: performance.
438: Level: intermediate
440: .seealso: PCSPAI, PCSetType()
441: @*/
442: PetscErrorCode PCSPAISetBlockSize(PC pc,int block_size1)
443: {
444: PetscErrorCode ierr,(*f)(PC,int);
446: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);
447: if (f) {
448: (*f)(pc,block_size1);
449: }
450: return(0);
451: }
453: /**********************************************************************/
457: /*@
458: PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
460: Input Parameters:
461: + pc - the preconditioner
462: - n - cache size {0,1,2,3,4,5} (default 5)
464: Notes: SPAI uses a hash table to cache messages and avoid
465: redundant communication. If suggest always using
466: 5. This parameter is irrelevant in the serial
467: version.
469: Level: intermediate
471: .seealso: PCSPAI, PCSetType()
472: @*/
473: PetscErrorCode PCSPAISetCacheSize(PC pc,int cache_size)
474: {
475: PetscErrorCode ierr,(*f)(PC,int);
477: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);
478: if (f) {
479: (*f)(pc,cache_size);
480: }
481: return(0);
482: }
484: /**********************************************************************/
488: /*@
489: PCSPAISetVerbose - verbosity level for the SPAI preconditioner
491: Input Parameters:
492: + pc - the preconditioner
493: - n - level (default 1)
495: Notes: print parameters, timings and matrix statistics
497: Level: intermediate
499: .seealso: PCSPAI, PCSetType()
500: @*/
501: PetscErrorCode PCSPAISetVerbose(PC pc,int verbose)
502: {
503: PetscErrorCode ierr,(*f)(PC,int);
505: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);
506: if (f) {
507: (*f)(pc,verbose);
508: }
509: return(0);
510: }
512: /**********************************************************************/
516: /*@
517: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
519: Input Parameters:
520: + pc - the preconditioner
521: - n - 0 or 1
523: Notes: If A has a symmetric nonzero pattern use -sp 1 to
524: improve performance by eliminating some communication
525: in the parallel version. Even if A does not have a
526: symmetric nonzero pattern -sp 1 may well lead to good
527: results, but the code will not follow the published
528: SPAI algorithm exactly.
531: Level: intermediate
533: .seealso: PCSPAI, PCSetType()
534: @*/
535: PetscErrorCode PCSPAISetSp(PC pc,int sp)
536: {
537: PetscErrorCode ierr,(*f)(PC,int);
539: PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);
540: if (f) {
541: (*f)(pc,sp);
542: }
543: return(0);
544: }
546: /**********************************************************************/
548: /**********************************************************************/
552: static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
553: {
554: PC_SPAI *ispai = (PC_SPAI*)pc->data;
556: int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
557: double epsilon1;
558: PetscTruth flg;
561: PetscOptionsHead("SPAI options");
562: PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);
563: if (flg) {
564: PCSPAISetEpsilon(pc,epsilon1);
565: }
566: PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);
567: if (flg) {
568: PCSPAISetNBSteps(pc,nbsteps1);
569: }
570: /* added 1/7/99 g.h. */
571: PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);
572: if (flg) {
573: PCSPAISetMax(pc,max1);
574: }
575: PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);
576: if (flg) {
577: PCSPAISetMaxNew(pc,maxnew1);
578: }
579: PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);
580: if (flg) {
581: PCSPAISetBlockSize(pc,block_size1);
582: }
583: PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);
584: if (flg) {
585: PCSPAISetCacheSize(pc,cache_size);
586: }
587: PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);
588: if (flg) {
589: PCSPAISetVerbose(pc,verbose);
590: }
591: PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);
592: if (flg) {
593: PCSPAISetSp(pc,sp);
594: }
595: PetscOptionsTail();
596: return(0);
597: }
599: /**********************************************************************/
601: /*MC
602: PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
603: as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
605: Options Database Keys:
606: + -pc_spai_epsilon <eps> - set tolerance
607: . -pc_spai_nbstep <n> - set nbsteps
608: . -pc_spai_max <m> - set max
609: . -pc_spai_max_new <m> - set maxnew
610: . -pc_spai_block_size <n> - set block size
611: . -pc_spai_cache_size <n> - set cache size
612: . -pc_spai_sp <m> - set sp
613: - -pc_spai_set_verbose <true,false> - verbose output
615: Notes: This only works with AIJ matrices.
617: Level: beginner
619: Concepts: approximate inverse
621: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
622: PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
623: PCSPAISetVerbose(), PCSPAISetSp()
624: M*/
629: PetscErrorCode PCCreate_SPAI(PC pc)
630: {
631: PC_SPAI *ispai;
635: PetscNewLog(pc,PC_SPAI,&ispai);
636: pc->data = ispai;
638: pc->ops->destroy = PCDestroy_SPAI;
639: pc->ops->apply = PCApply_SPAI;
640: pc->ops->applyrichardson = 0;
641: pc->ops->setup = PCSetUp_SPAI;
642: pc->ops->view = PCView_SPAI;
643: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
645: ispai->epsilon = .4;
646: ispai->nbsteps = 5;
647: ispai->max = 5000;
648: ispai->maxnew = 5;
649: ispai->block_size = 1;
650: ispai->cache_size = 5;
651: ispai->verbose = 0;
653: ispai->sp = 1;
654: MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));
656: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
657: "PCSPAISetEpsilon_SPAI",
658: PCSPAISetEpsilon_SPAI);
659: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
660: "PCSPAISetNBSteps_SPAI",
661: PCSPAISetNBSteps_SPAI);
662: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
663: "PCSPAISetMax_SPAI",
664: PCSPAISetMax_SPAI);
665: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
666: "PCSPAISetMaxNew_SPAI",
667: PCSPAISetMaxNew_SPAI);
668: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
669: "PCSPAISetBlockSize_SPAI",
670: PCSPAISetBlockSize_SPAI);
671: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
672: "PCSPAISetCacheSize_SPAI",
673: PCSPAISetCacheSize_SPAI);
674: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
675: "PCSPAISetVerbose_SPAI",
676: PCSPAISetVerbose_SPAI);
677: PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
678: "PCSPAISetSp_SPAI",
679: PCSPAISetSp_SPAI);
681: return(0);
682: }
685: /**********************************************************************/
687: /*
688: Converts from a PETSc matrix to an SPAI matrix
689: */
692: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
693: {
694: matrix *M;
695: int i,j,col;
696: int row_indx;
697: int len,pe,local_indx,start_indx;
698: int *mapping;
699: PetscErrorCode ierr;
700: const int *cols;
701: const double *vals;
702: int *num_ptr,n,mnl,nnl,nz,rstart,rend;
703: PetscMPIInt size,rank;
704: struct compressed_lines *rows;
707:
708: MPI_Comm_size(comm,&size);
709: MPI_Comm_rank(comm,&rank);
710: MatGetSize(A,&n,&n);
711: MatGetLocalSize(A,&mnl,&nnl);
713: /*
714: not sure why a barrier is required. commenting out
715: MPI_Barrier(comm);
716: */
718: M = new_matrix((SPAI_Comm)comm);
719:
720: M->n = n;
721: M->bs = 1;
722: M->max_block_size = 1;
724: M->mnls = (int*)malloc(sizeof(int)*size);
725: M->start_indices = (int*)malloc(sizeof(int)*size);
726: M->pe = (int*)malloc(sizeof(int)*n);
727: M->block_sizes = (int*)malloc(sizeof(int)*n);
728: for (i=0; i<n; i++) M->block_sizes[i] = 1;
730: MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);
732: M->start_indices[0] = 0;
733: for (i=1; i<size; i++) {
734: M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
735: }
737: M->mnl = M->mnls[M->myid];
738: M->my_start_index = M->start_indices[M->myid];
740: for (i=0; i<size; i++) {
741: start_indx = M->start_indices[i];
742: for (j=0; j<M->mnls[i]; j++)
743: M->pe[start_indx+j] = i;
744: }
746: if (AT) {
747: M->lines = new_compressed_lines(M->mnls[rank],1);
748: } else {
749: M->lines = new_compressed_lines(M->mnls[rank],0);
750: }
752: rows = M->lines;
754: /* Determine the mapping from global indices to pointers */
755: PetscMalloc(M->n*sizeof(int),&mapping);
756: pe = 0;
757: local_indx = 0;
758: for (i=0; i<M->n; i++) {
759: if (local_indx >= M->mnls[pe]) {
760: pe++;
761: local_indx = 0;
762: }
763: mapping[i] = local_indx + M->start_indices[pe];
764: local_indx++;
765: }
768: PetscMalloc(mnl*sizeof(int),&num_ptr);
770: /*********************************************************/
771: /************** Set up the row structure *****************/
772: /*********************************************************/
774: /* count number of nonzeros in every row */
775: MatGetOwnershipRange(A,&rstart,&rend);
776: for (i=rstart; i<rend; i++) {
777: MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
778: MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
779: }
781: /* allocate buffers */
782: len = 0;
783: for (i=0; i<mnl; i++) {
784: if (len < num_ptr[i]) len = num_ptr[i];
785: }
787: for (i=rstart; i<rend; i++) {
788: row_indx = i-rstart;
789: len = num_ptr[row_indx];
790: rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
791: rows->A[row_indx] = (double*)malloc(len*sizeof(double));
792: }
794: /* copy the matrix */
795: for (i=rstart; i<rend; i++) {
796: row_indx = i - rstart;
797: MatGetRow(A,i,&nz,&cols,&vals);
798: for (j=0; j<nz; j++) {
799: col = cols[j];
800: len = rows->len[row_indx]++;
801: rows->ptrs[row_indx][len] = mapping[col];
802: rows->A[row_indx][len] = vals[j];
803: }
804: rows->slen[row_indx] = rows->len[row_indx];
805: MatRestoreRow(A,i,&nz,&cols,&vals);
806: }
809: /************************************************************/
810: /************** Set up the column structure *****************/
811: /*********************************************************/
813: if (AT) {
815: /* count number of nonzeros in every column */
816: for (i=rstart; i<rend; i++) {
817: MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
818: MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);
819: }
821: /* allocate buffers */
822: len = 0;
823: for (i=0; i<mnl; i++) {
824: if (len < num_ptr[i]) len = num_ptr[i];
825: }
827: for (i=rstart; i<rend; i++) {
828: row_indx = i-rstart;
829: len = num_ptr[row_indx];
830: rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
831: }
833: /* copy the matrix (i.e., the structure) */
834: for (i=rstart; i<rend; i++) {
835: row_indx = i - rstart;
836: MatGetRow(AT,i,&nz,&cols,&vals);
837: for (j=0; j<nz; j++) {
838: col = cols[j];
839: len = rows->rlen[row_indx]++;
840: rows->rptrs[row_indx][len] = mapping[col];
841: }
842: MatRestoreRow(AT,i,&nz,&cols,&vals);
843: }
844: }
846: PetscFree(num_ptr);
847: PetscFree(mapping);
849: order_pointers(M);
850: M->maxnz = calc_maxnz(M);
852: *B = M;
854: return(0);
855: }
857: /**********************************************************************/
859: /*
860: Converts from an SPAI matrix B to a PETSc matrix PB.
861: This assumes that the the SPAI matrix B is stored in
862: COMPRESSED-ROW format.
863: */
866: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
867: {
868: PetscMPIInt size,rank;
870: int m,n,M,N;
871: int d_nz,o_nz;
872: int *d_nnz,*o_nnz;
873: int i,k,global_row,global_col,first_diag_col,last_diag_col;
874: PetscScalar val;
877: MPI_Comm_size(comm,&size);
878: MPI_Comm_rank(comm,&rank);
879:
880: m = n = B->mnls[rank];
881: d_nz = o_nz = 0;
883: /* Determine preallocation for MatCreateMPIAIJ */
884: PetscMalloc(m*sizeof(PetscInt),&d_nnz);
885: PetscMalloc(m*sizeof(PetscInt),&o_nnz);
886: for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
887: first_diag_col = B->start_indices[rank];
888: last_diag_col = first_diag_col + B->mnls[rank];
889: for (i=0; i<B->mnls[rank]; i++) {
890: for (k=0; k<B->lines->len[i]; k++) {
891: global_col = B->lines->ptrs[i][k];
892: if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
893: d_nnz[i]++;
894: else
895: o_nnz[i]++;
896: }
897: }
899: M = N = B->n;
900: /* Here we only know how to create AIJ format */
901: MatCreate(comm,PB);
902: MatSetSizes(*PB,m,n,M,N);
903: MatSetType(*PB,MATAIJ);
904: MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);
905: MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);
907: for (i=0; i<B->mnls[rank]; i++) {
908: global_row = B->start_indices[rank]+i;
909: for (k=0; k<B->lines->len[i]; k++) {
910: global_col = B->lines->ptrs[i][k];
911: val = B->lines->A[i][k];
912: MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);
913: }
914: }
916: PetscFree(d_nnz);
917: PetscFree(o_nnz);
919: MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);
920: MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);
922: return(0);
923: }
925: /**********************************************************************/
927: /*
928: Converts from an SPAI vector v to a PETSc vec Pv.
929: */
932: PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
933: {
935: PetscMPIInt size,rank;
936: int m,M,i,*mnls,*start_indices,*global_indices;
937:
939: MPI_Comm_size(comm,&size);
940: MPI_Comm_rank(comm,&rank);
941:
942: m = v->mnl;
943: M = v->n;
944:
945:
946: VecCreateMPI(comm,m,M,Pv);
948: PetscMalloc(size*sizeof(int),&mnls);
949: MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);
950:
951: PetscMalloc(size*sizeof(int),&start_indices);
952: start_indices[0] = 0;
953: for (i=1; i<size; i++)
954: start_indices[i] = start_indices[i-1] +mnls[i-1];
955:
956: PetscMalloc(v->mnl*sizeof(int),&global_indices);
957: for (i=0; i<v->mnl; i++)
958: global_indices[i] = start_indices[rank] + i;
960: PetscFree(mnls);
961: PetscFree(start_indices);
962:
963: VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);
964: VecAssemblyBegin(*Pv);
965: VecAssemblyEnd(*Pv);
967: PetscFree(global_indices);
968: return(0);
969: }