Actual source code: mpibaij.c
1: #define PETSCMAT_DLL
3: #include ../src/mat/impls/baij/mpi/mpibaij.h
4: #include petscblaslapack.h
6: EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7: EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8: EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9: EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10: EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11: EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12: EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13: EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14: EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15: EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
19: PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20: {
21: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
23: PetscInt i,*idxb = 0;
24: PetscScalar *va,*vb;
25: Vec vtmp;
28: MatGetRowMaxAbs(a->A,v,idx);
29: VecGetArray(v,&va);
30: if (idx) {
31: for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32: }
34: VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);
35: if (idx) {PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);}
36: MatGetRowMaxAbs(a->B,vtmp,idxb);
37: VecGetArray(vtmp,&vb);
39: for (i=0; i<A->rmap->n; i++){
40: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41: }
43: VecRestoreArray(v,&va);
44: VecRestoreArray(vtmp,&vb);
45: if (idxb) {PetscFree(idxb);}
46: VecDestroy(vtmp);
47: return(0);
48: }
53: PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
54: {
55: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
59: MatStoreValues(aij->A);
60: MatStoreValues(aij->B);
61: return(0);
62: }
68: PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
69: {
70: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74: MatRetrieveValues(aij->A);
75: MatRetrieveValues(aij->B);
76: return(0);
77: }
80: /*
81: Local utility routine that creates a mapping from the global column
82: number to the local number in the off-diagonal part of the local
83: storage of the matrix. This is done in a non scalable way since the
84: length of colmap equals the global matrix length.
85: */
88: PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89: {
90: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
91: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
93: PetscInt nbs = B->nbs,i,bs=mat->rmap->bs;
96: #if defined (PETSC_USE_CTABLE)
97: PetscTableCreate(baij->nbs,&baij->colmap);
98: for (i=0; i<nbs; i++){
99: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
100: }
101: #else
102: PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);
103: PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
104: PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));
105: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106: #endif
107: return(0);
108: }
110: #define CHUNKSIZE 10
112: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113: { \
114: \
115: brow = row/bs; \
116: rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117: rmax = aimax[brow]; nrow = ailen[brow]; \
118: bcol = col/bs; \
119: ridx = row % bs; cidx = col % bs; \
120: low = 0; high = nrow; \
121: while (high-low > 3) { \
122: t = (low+high)/2; \
123: if (rp[t] > bcol) high = t; \
124: else low = t; \
125: } \
126: for (_i=low; _i<high; _i++) { \
127: if (rp[_i] > bcol) break; \
128: if (rp[_i] == bcol) { \
129: bap = ap + bs2*_i + bs*cidx + ridx; \
130: if (addv == ADD_VALUES) *bap += value; \
131: else *bap = value; \
132: goto a_noinsert; \
133: } \
134: } \
135: if (a->nonew == 1) goto a_noinsert; \
136: if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138: N = nrow++ - 1; \
139: /* shift up all the later entries in this row */ \
140: for (ii=N; ii>=_i; ii--) { \
141: rp[ii+1] = rp[ii]; \
142: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
143: } \
144: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); } \
145: rp[_i] = bcol; \
146: ap[bs2*_i + bs*cidx + ridx] = value; \
147: a_noinsert:; \
148: ailen[brow] = nrow; \
149: }
151: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152: { \
153: brow = row/bs; \
154: rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155: rmax = bimax[brow]; nrow = bilen[brow]; \
156: bcol = col/bs; \
157: ridx = row % bs; cidx = col % bs; \
158: low = 0; high = nrow; \
159: while (high-low > 3) { \
160: t = (low+high)/2; \
161: if (rp[t] > bcol) high = t; \
162: else low = t; \
163: } \
164: for (_i=low; _i<high; _i++) { \
165: if (rp[_i] > bcol) break; \
166: if (rp[_i] == bcol) { \
167: bap = ap + bs2*_i + bs*cidx + ridx; \
168: if (addv == ADD_VALUES) *bap += value; \
169: else *bap = value; \
170: goto b_noinsert; \
171: } \
172: } \
173: if (b->nonew == 1) goto b_noinsert; \
174: if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175: MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176: CHKMEMQ;\
177: N = nrow++ - 1; \
178: /* shift up all the later entries in this row */ \
179: for (ii=N; ii>=_i; ii--) { \
180: rp[ii+1] = rp[ii]; \
181: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar)); \
182: } \
183: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));} \
184: rp[_i] = bcol; \
185: ap[bs2*_i + bs*cidx + ridx] = value; \
186: b_noinsert:; \
187: bilen[brow] = nrow; \
188: }
192: PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193: {
194: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
195: MatScalar value;
196: PetscTruth roworiented = baij->roworiented;
198: PetscInt i,j,row,col;
199: PetscInt rstart_orig=mat->rmap->rstart;
200: PetscInt rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201: PetscInt cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
203: /* Some Variables required in the macro */
204: Mat A = baij->A;
205: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
206: PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207: MatScalar *aa=a->a;
209: Mat B = baij->B;
210: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
211: PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212: MatScalar *ba=b->a;
214: PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol;
215: PetscInt low,high,t,ridx,cidx,bs2=a->bs2;
216: MatScalar *ap,*bap;
220: for (i=0; i<m; i++) {
221: if (im[i] < 0) continue;
222: #if defined(PETSC_USE_DEBUG)
223: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224: #endif
225: if (im[i] >= rstart_orig && im[i] < rend_orig) {
226: row = im[i] - rstart_orig;
227: for (j=0; j<n; j++) {
228: if (in[j] >= cstart_orig && in[j] < cend_orig){
229: col = in[j] - cstart_orig;
230: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
232: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
233: } else if (in[j] < 0) continue;
234: #if defined(PETSC_USE_DEBUG)
235: else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
236: #endif
237: else {
238: if (mat->was_assembled) {
239: if (!baij->colmap) {
240: CreateColmap_MPIBAIJ_Private(mat);
241: }
242: #if defined (PETSC_USE_CTABLE)
243: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
244: col = col - 1;
245: #else
246: col = baij->colmap[in[j]/bs] - 1;
247: #endif
248: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
249: DisAssemble_MPIBAIJ(mat);
250: col = in[j];
251: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
252: B = baij->B;
253: b = (Mat_SeqBAIJ*)(B)->data;
254: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
255: ba=b->a;
256: } else col += in[j]%bs;
257: } else col = in[j];
258: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
260: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
261: }
262: }
263: } else {
264: if (!baij->donotstash) {
265: if (roworiented) {
266: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
267: } else {
268: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
269: }
270: }
271: }
272: }
273: return(0);
274: }
278: PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279: {
280: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
281: const PetscScalar *value;
282: MatScalar *barray=baij->barray;
283: PetscTruth roworiented = baij->roworiented;
284: PetscErrorCode ierr;
285: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
286: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288:
290: if(!barray) {
291: PetscMalloc(bs2*sizeof(MatScalar),&barray);
292: baij->barray = barray;
293: }
295: if (roworiented) {
296: stepval = (n-1)*bs;
297: } else {
298: stepval = (m-1)*bs;
299: }
300: for (i=0; i<m; i++) {
301: if (im[i] < 0) continue;
302: #if defined(PETSC_USE_DEBUG)
303: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304: #endif
305: if (im[i] >= rstart && im[i] < rend) {
306: row = im[i] - rstart;
307: for (j=0; j<n; j++) {
308: /* If NumCol = 1 then a copy is not required */
309: if ((roworiented) && (n == 1)) {
310: barray = (MatScalar*)v + i*bs2;
311: } else if((!roworiented) && (m == 1)) {
312: barray = (MatScalar*)v + j*bs2;
313: } else { /* Here a copy is required */
314: if (roworiented) {
315: value = v + i*(stepval+bs)*bs + j*bs;
316: } else {
317: value = v + j*(stepval+bs)*bs + i*bs;
318: }
319: for (ii=0; ii<bs; ii++,value+=stepval) {
320: for (jj=0; jj<bs; jj++) {
321: *barray++ = *value++;
322: }
323: }
324: barray -=bs2;
325: }
326:
327: if (in[j] >= cstart && in[j] < cend){
328: col = in[j] - cstart;
329: MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
330: }
331: else if (in[j] < 0) continue;
332: #if defined(PETSC_USE_DEBUG)
333: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
334: #endif
335: else {
336: if (mat->was_assembled) {
337: if (!baij->colmap) {
338: CreateColmap_MPIBAIJ_Private(mat);
339: }
341: #if defined(PETSC_USE_DEBUG)
342: #if defined (PETSC_USE_CTABLE)
343: { PetscInt data;
344: PetscTableFind(baij->colmap,in[j]+1,&data);
345: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
346: }
347: #else
348: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
349: #endif
350: #endif
351: #if defined (PETSC_USE_CTABLE)
352: PetscTableFind(baij->colmap,in[j]+1,&col);
353: col = (col - 1)/bs;
354: #else
355: col = (baij->colmap[in[j]] - 1)/bs;
356: #endif
357: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358: DisAssemble_MPIBAIJ(mat);
359: col = in[j];
360: }
361: }
362: else col = in[j];
363: MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
364: }
365: }
366: } else {
367: if (!baij->donotstash) {
368: if (roworiented) {
369: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
370: } else {
371: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
372: }
373: }
374: }
375: }
376: return(0);
377: }
379: #define HASH_KEY 0.6180339887
380: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381: /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382: /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
385: PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386: {
387: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
388: PetscTruth roworiented = baij->roworiented;
390: PetscInt i,j,row,col;
391: PetscInt rstart_orig=mat->rmap->rstart;
392: PetscInt rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393: PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394: PetscReal tmp;
395: MatScalar **HD = baij->hd,value;
396: #if defined(PETSC_USE_DEBUG)
397: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398: #endif
402: for (i=0; i<m; i++) {
403: #if defined(PETSC_USE_DEBUG)
404: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405: if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406: #endif
407: row = im[i];
408: if (row >= rstart_orig && row < rend_orig) {
409: for (j=0; j<n; j++) {
410: col = in[j];
411: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412: /* Look up PetscInto the Hash Table */
413: key = (row/bs)*Nbs+(col/bs)+1;
414: h1 = HASH(size,key,tmp);
416:
417: idx = h1;
418: #if defined(PETSC_USE_DEBUG)
419: insert_ct++;
420: total_ct++;
421: if (HT[idx] != key) {
422: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423: if (idx == size) {
424: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425: if (idx == h1) {
426: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427: }
428: }
429: }
430: #else
431: if (HT[idx] != key) {
432: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433: if (idx == size) {
434: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435: if (idx == h1) {
436: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437: }
438: }
439: }
440: #endif
441: /* A HASH table entry is found, so insert the values at the correct address */
442: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
444: }
445: } else {
446: if (!baij->donotstash) {
447: if (roworiented) {
448: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);
449: } else {
450: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);
451: }
452: }
453: }
454: }
455: #if defined(PETSC_USE_DEBUG)
456: baij->ht_total_ct = total_ct;
457: baij->ht_insert_ct = insert_ct;
458: #endif
459: return(0);
460: }
464: PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465: {
466: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
467: PetscTruth roworiented = baij->roworiented;
468: PetscErrorCode ierr;
469: PetscInt i,j,ii,jj,row,col;
470: PetscInt rstart=baij->rstartbs;
471: PetscInt rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472: PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473: PetscReal tmp;
474: MatScalar **HD = baij->hd,*baij_a;
475: const PetscScalar *v_t,*value;
476: #if defined(PETSC_USE_DEBUG)
477: PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478: #endif
479:
482: if (roworiented) {
483: stepval = (n-1)*bs;
484: } else {
485: stepval = (m-1)*bs;
486: }
487: for (i=0; i<m; i++) {
488: #if defined(PETSC_USE_DEBUG)
489: if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491: #endif
492: row = im[i];
493: v_t = v + i*nbs2;
494: if (row >= rstart && row < rend) {
495: for (j=0; j<n; j++) {
496: col = in[j];
498: /* Look up into the Hash Table */
499: key = row*Nbs+col+1;
500: h1 = HASH(size,key,tmp);
501:
502: idx = h1;
503: #if defined(PETSC_USE_DEBUG)
504: total_ct++;
505: insert_ct++;
506: if (HT[idx] != key) {
507: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508: if (idx == size) {
509: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510: if (idx == h1) {
511: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512: }
513: }
514: }
515: #else
516: if (HT[idx] != key) {
517: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518: if (idx == size) {
519: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520: if (idx == h1) {
521: SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522: }
523: }
524: }
525: #endif
526: baij_a = HD[idx];
527: if (roworiented) {
528: /*value = v + i*(stepval+bs)*bs + j*bs;*/
529: /* value = v + (i*(stepval+bs)+j)*bs; */
530: value = v_t;
531: v_t += bs;
532: if (addv == ADD_VALUES) {
533: for (ii=0; ii<bs; ii++,value+=stepval) {
534: for (jj=ii; jj<bs2; jj+=bs) {
535: baij_a[jj] += *value++;
536: }
537: }
538: } else {
539: for (ii=0; ii<bs; ii++,value+=stepval) {
540: for (jj=ii; jj<bs2; jj+=bs) {
541: baij_a[jj] = *value++;
542: }
543: }
544: }
545: } else {
546: value = v + j*(stepval+bs)*bs + i*bs;
547: if (addv == ADD_VALUES) {
548: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549: for (jj=0; jj<bs; jj++) {
550: baij_a[jj] += *value++;
551: }
552: }
553: } else {
554: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555: for (jj=0; jj<bs; jj++) {
556: baij_a[jj] = *value++;
557: }
558: }
559: }
560: }
561: }
562: } else {
563: if (!baij->donotstash) {
564: if (roworiented) {
565: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
566: } else {
567: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
568: }
569: }
570: }
571: }
572: #if defined(PETSC_USE_DEBUG)
573: baij->ht_total_ct = total_ct;
574: baij->ht_insert_ct = insert_ct;
575: #endif
576: return(0);
577: }
581: PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582: {
583: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
585: PetscInt bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586: PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
589: for (i=0; i<m; i++) {
590: if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591: if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593: row = idxm[i] - bsrstart;
594: for (j=0; j<n; j++) {
595: if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596: if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597: if (idxn[j] >= bscstart && idxn[j] < bscend){
598: col = idxn[j] - bscstart;
599: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
600: } else {
601: if (!baij->colmap) {
602: CreateColmap_MPIBAIJ_Private(mat);
603: }
604: #if defined (PETSC_USE_CTABLE)
605: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
606: data --;
607: #else
608: data = baij->colmap[idxn[j]/bs]-1;
609: #endif
610: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611: else {
612: col = data + idxn[j]%bs;
613: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
614: }
615: }
616: }
617: } else {
618: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
619: }
620: }
621: return(0);
622: }
626: PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627: {
628: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
629: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631: PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632: PetscReal sum = 0.0;
633: MatScalar *v;
636: if (baij->size == 1) {
637: MatNorm(baij->A,type,nrm);
638: } else {
639: if (type == NORM_FROBENIUS) {
640: v = amat->a;
641: nz = amat->nz*bs2;
642: for (i=0; i<nz; i++) {
643: #if defined(PETSC_USE_COMPLEX)
644: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645: #else
646: sum += (*v)*(*v); v++;
647: #endif
648: }
649: v = bmat->a;
650: nz = bmat->nz*bs2;
651: for (i=0; i<nz; i++) {
652: #if defined(PETSC_USE_COMPLEX)
653: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654: #else
655: sum += (*v)*(*v); v++;
656: #endif
657: }
658: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
659: *nrm = sqrt(*nrm);
660: } else if (type == NORM_1) { /* max column sum */
661: PetscReal *tmp,*tmp2;
662: PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs;
663: PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);
664: PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));
665: v = amat->a; jj = amat->j;
666: for (i=0; i<amat->nz; i++) {
667: for (j=0; j<bs; j++){
668: col = bs*(cstart + *jj) + j; /* column index */
669: for (row=0; row<bs; row++){
670: tmp[col] += PetscAbsScalar(*v); v++;
671: }
672: }
673: jj++;
674: }
675: v = bmat->a; jj = bmat->j;
676: for (i=0; i<bmat->nz; i++) {
677: for (j=0; j<bs; j++){
678: col = bs*garray[*jj] + j;
679: for (row=0; row<bs; row++){
680: tmp[col] += PetscAbsScalar(*v); v++;
681: }
682: }
683: jj++;
684: }
685: MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);
686: *nrm = 0.0;
687: for (j=0; j<mat->cmap->N; j++) {
688: if (tmp2[j] > *nrm) *nrm = tmp2[j];
689: }
690: PetscFree2(tmp,tmp2);
691: } else if (type == NORM_INFINITY) { /* max row sum */
692: PetscReal *sums;
693: PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694: sum = 0.0;
695: for (j=0; j<amat->mbs; j++) {
696: for (row=0; row<bs; row++) sums[row] = 0.0;
697: v = amat->a + bs2*amat->i[j];
698: nz = amat->i[j+1]-amat->i[j];
699: for (i=0; i<nz; i++) {
700: for (col=0; col<bs; col++){
701: for (row=0; row<bs; row++){
702: sums[row] += PetscAbsScalar(*v); v++;
703: }
704: }
705: }
706: v = bmat->a + bs2*bmat->i[j];
707: nz = bmat->i[j+1]-bmat->i[j];
708: for (i=0; i<nz; i++) {
709: for (col=0; col<bs; col++){
710: for (row=0; row<bs; row++){
711: sums[row] += PetscAbsScalar(*v); v++;
712: }
713: }
714: }
715: for (row=0; row<bs; row++){
716: if (sums[row] > sum) sum = sums[row];
717: }
718: }
719: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);
720: PetscFree(sums);
721: } else {
722: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723: }
724: }
725: return(0);
726: }
728: /*
729: Creates the hash table, and sets the table
730: This table is created only once.
731: If new entried need to be added to the matrix
732: then the hash table has to be destroyed and
733: recreated.
734: */
737: PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738: {
739: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
740: Mat A = baij->A,B=baij->B;
741: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742: PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
744: PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
745: PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746: PetscInt *HT,key;
747: MatScalar **HD;
748: PetscReal tmp;
749: #if defined(PETSC_USE_INFO)
750: PetscInt ct=0,max=0;
751: #endif
754: if (baij->ht) return(0);
756: baij->ht_size = (PetscInt)(factor*nz);
757: ht_size = baij->ht_size;
758:
759: /* Allocate Memory for Hash Table */
760: PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);
761: PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));
762: PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));
763: HD = baij->hd;
764: HT = baij->ht;
766: /* Loop Over A */
767: for (i=0; i<a->mbs; i++) {
768: for (j=ai[i]; j<ai[i+1]; j++) {
769: row = i+rstart;
770: col = aj[j]+cstart;
771:
772: key = row*Nbs + col + 1;
773: h1 = HASH(ht_size,key,tmp);
774: for (k=0; k<ht_size; k++){
775: if (!HT[(h1+k)%ht_size]) {
776: HT[(h1+k)%ht_size] = key;
777: HD[(h1+k)%ht_size] = a->a + j*bs2;
778: break;
779: #if defined(PETSC_USE_INFO)
780: } else {
781: ct++;
782: #endif
783: }
784: }
785: #if defined(PETSC_USE_INFO)
786: if (k> max) max = k;
787: #endif
788: }
789: }
790: /* Loop Over B */
791: for (i=0; i<b->mbs; i++) {
792: for (j=bi[i]; j<bi[i+1]; j++) {
793: row = i+rstart;
794: col = garray[bj[j]];
795: key = row*Nbs + col + 1;
796: h1 = HASH(ht_size,key,tmp);
797: for (k=0; k<ht_size; k++){
798: if (!HT[(h1+k)%ht_size]) {
799: HT[(h1+k)%ht_size] = key;
800: HD[(h1+k)%ht_size] = b->a + j*bs2;
801: break;
802: #if defined(PETSC_USE_INFO)
803: } else {
804: ct++;
805: #endif
806: }
807: }
808: #if defined(PETSC_USE_INFO)
809: if (k> max) max = k;
810: #endif
811: }
812: }
813:
814: /* Print Summary */
815: #if defined(PETSC_USE_INFO)
816: for (i=0,j=0; i<ht_size; i++) {
817: if (HT[i]) {j++;}
818: }
819: PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
820: #endif
821: return(0);
822: }
826: PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
827: {
828: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
830: PetscInt nstash,reallocs;
831: InsertMode addv;
834: if (baij->donotstash) {
835: return(0);
836: }
838: /* make sure all processors are either in INSERTMODE or ADDMODE */
839: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);
840: if (addv == (ADD_VALUES|INSERT_VALUES)) {
841: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
842: }
843: mat->insertmode = addv; /* in case this processor had no cache */
845: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
846: MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);
847: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
848: PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
849: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
850: PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
851: return(0);
852: }
856: PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
857: {
858: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
859: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data;
861: PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2;
862: PetscInt *row,*col;
863: PetscTruth r1,r2,r3,other_disassembled;
864: MatScalar *val;
865: InsertMode addv = mat->insertmode;
866: PetscMPIInt n;
868: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
870: if (!baij->donotstash) {
871: while (1) {
872: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
873: if (!flg) break;
875: for (i=0; i<n;) {
876: /* Now identify the consecutive vals belonging to the same row */
877: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
878: if (j < n) ncols = j-i;
879: else ncols = n-i;
880: /* Now assemble all these values with a single function call */
881: MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
882: i = j;
883: }
884: }
885: MatStashScatterEnd_Private(&mat->stash);
886: /* Now process the block-stash. Since the values are stashed column-oriented,
887: set the roworiented flag to column oriented, and after MatSetValues()
888: restore the original flags */
889: r1 = baij->roworiented;
890: r2 = a->roworiented;
891: r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
892: baij->roworiented = PETSC_FALSE;
893: a->roworiented = PETSC_FALSE;
894: (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
895: while (1) {
896: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
897: if (!flg) break;
898:
899: for (i=0; i<n;) {
900: /* Now identify the consecutive vals belonging to the same row */
901: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
902: if (j < n) ncols = j-i;
903: else ncols = n-i;
904: MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
905: i = j;
906: }
907: }
908: MatStashScatterEnd_Private(&mat->bstash);
909: baij->roworiented = r1;
910: a->roworiented = r2;
911: ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
912: }
913:
914: MatAssemblyBegin(baij->A,mode);
915: MatAssemblyEnd(baij->A,mode);
917: /* determine if any processor has disassembled, if so we must
918: also disassemble ourselfs, in order that we may reassemble. */
919: /*
920: if nonzero structure of submatrix B cannot change then we know that
921: no processor disassembled thus we can skip this stuff
922: */
923: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
924: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);
925: if (mat->was_assembled && !other_disassembled) {
926: DisAssemble_MPIBAIJ(mat);
927: }
928: }
930: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
931: MatSetUpMultiply_MPIBAIJ(mat);
932: }
933: ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
934: MatAssemblyBegin(baij->B,mode);
935: MatAssemblyEnd(baij->B,mode);
936:
937: #if defined(PETSC_USE_INFO)
938: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939: PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
940: baij->ht_total_ct = 0;
941: baij->ht_insert_ct = 0;
942: }
943: #endif
944: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
946: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
947: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
948: }
950: PetscFree2(baij->rowvalues,baij->rowindices);
951: baij->rowvalues = 0;
952: return(0);
953: }
957: static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
958: {
959: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
960: PetscErrorCode ierr;
961: PetscMPIInt size = baij->size,rank = baij->rank;
962: PetscInt bs = mat->rmap->bs;
963: PetscTruth iascii,isdraw;
964: PetscViewer sviewer;
965: PetscViewerFormat format;
968: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
969: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
970: if (iascii) {
971: PetscViewerGetFormat(viewer,&format);
972: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
973: MatInfo info;
974: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
975: MatGetInfo(mat,MAT_LOCAL,&info);
976: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
977: rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
978: mat->rmap->bs,(PetscInt)info.memory);
979: MatGetInfo(baij->A,MAT_LOCAL,&info);
980: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
981: MatGetInfo(baij->B,MAT_LOCAL,&info);
982: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);
983: PetscViewerFlush(viewer);
984: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
985: VecScatterView(baij->Mvctx,viewer);
986: return(0);
987: } else if (format == PETSC_VIEWER_ASCII_INFO) {
988: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
989: return(0);
990: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
991: return(0);
992: }
993: }
995: if (isdraw) {
996: PetscDraw draw;
997: PetscTruth isnull;
998: PetscViewerDrawGetDraw(viewer,0,&draw);
999: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1000: }
1002: if (size == 1) {
1003: PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);
1004: MatView(baij->A,viewer);
1005: } else {
1006: /* assemble the entire matrix onto first processor. */
1007: Mat A;
1008: Mat_SeqBAIJ *Aloc;
1009: PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1010: MatScalar *a;
1012: /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1013: /* Perhaps this should be the type of mat? */
1014: MatCreate(((PetscObject)mat)->comm,&A);
1015: if (!rank) {
1016: MatSetSizes(A,M,N,M,N);
1017: } else {
1018: MatSetSizes(A,0,0,M,N);
1019: }
1020: MatSetType(A,MATMPIBAIJ);
1021: MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1022: PetscLogObjectParent(mat,A);
1024: /* copy over the A part */
1025: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1026: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1027: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1029: for (i=0; i<mbs; i++) {
1030: rvals[0] = bs*(baij->rstartbs + i);
1031: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1032: for (j=ai[i]; j<ai[i+1]; j++) {
1033: col = (baij->cstartbs+aj[j])*bs;
1034: for (k=0; k<bs; k++) {
1035: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1036: col++; a += bs;
1037: }
1038: }
1039: }
1040: /* copy over the B part */
1041: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1042: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1043: for (i=0; i<mbs; i++) {
1044: rvals[0] = bs*(baij->rstartbs + i);
1045: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1046: for (j=ai[i]; j<ai[i+1]; j++) {
1047: col = baij->garray[aj[j]]*bs;
1048: for (k=0; k<bs; k++) {
1049: MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);
1050: col++; a += bs;
1051: }
1052: }
1053: }
1054: PetscFree(rvals);
1055: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1056: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1057: /*
1058: Everyone has to call to draw the matrix since the graphics waits are
1059: synchronized across all processors that share the PetscDraw object
1060: */
1061: PetscViewerGetSingleton(viewer,&sviewer);
1062: if (!rank) {
1063: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);
1064: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1065: }
1066: PetscViewerRestoreSingleton(viewer,&sviewer);
1067: MatDestroy(A);
1068: }
1069: return(0);
1070: }
1074: PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1075: {
1077: PetscTruth iascii,isdraw,issocket,isbinary;
1080: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1081: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1082: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1083: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1084: if (iascii || isdraw || issocket || isbinary) {
1085: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1086: } else {
1087: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1088: }
1089: return(0);
1090: }
1094: PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1095: {
1096: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1100: #if defined(PETSC_USE_LOG)
1101: PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1102: #endif
1103: MatStashDestroy_Private(&mat->stash);
1104: MatStashDestroy_Private(&mat->bstash);
1105: MatDestroy(baij->A);
1106: MatDestroy(baij->B);
1107: #if defined (PETSC_USE_CTABLE)
1108: if (baij->colmap) {PetscTableDestroy(baij->colmap);}
1109: #else
1110: PetscFree(baij->colmap);
1111: #endif
1112: PetscFree(baij->garray);
1113: if (baij->lvec) {VecDestroy(baij->lvec);}
1114: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
1115: PetscFree2(baij->rowvalues,baij->rowindices);
1116: PetscFree(baij->barray);
1117: PetscFree2(baij->hd,baij->ht);
1118: PetscFree(baij->rangebs);
1119: PetscFree(baij);
1121: PetscObjectChangeTypeName((PetscObject)mat,0);
1122: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
1123: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
1124: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
1125: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);
1126: PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);
1127: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
1128: PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);
1129: return(0);
1130: }
1134: PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1135: {
1136: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1138: PetscInt nt;
1141: VecGetLocalSize(xx,&nt);
1142: if (nt != A->cmap->n) {
1143: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1144: }
1145: VecGetLocalSize(yy,&nt);
1146: if (nt != A->rmap->n) {
1147: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1148: }
1149: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1150: (*a->A->ops->mult)(a->A,xx,yy);
1151: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1152: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1153: return(0);
1154: }
1158: PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1159: {
1160: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1164: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1165: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1166: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
1167: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1168: return(0);
1169: }
1173: PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1174: {
1175: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1177: PetscTruth merged;
1180: VecScatterGetMerged(a->Mvctx,&merged);
1181: /* do nondiagonal part */
1182: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1183: if (!merged) {
1184: /* send it on its way */
1185: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1186: /* do local part */
1187: (*a->A->ops->multtranspose)(a->A,xx,yy);
1188: /* receive remote parts: note this assumes the values are not actually */
1189: /* inserted in yy until the next line */
1190: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1191: } else {
1192: /* do local part */
1193: (*a->A->ops->multtranspose)(a->A,xx,yy);
1194: /* send it on its way */
1195: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1196: /* values actually were received in the Begin() but we need to call this nop */
1197: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
1198: }
1199: return(0);
1200: }
1204: PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1205: {
1206: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1210: /* do nondiagonal part */
1211: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1212: /* send it on its way */
1213: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1214: /* do local part */
1215: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1216: /* receive remote parts: note this assumes the values are not actually */
1217: /* inserted in yy until the next line, which is true for my implementation*/
1218: /* but is not perhaps always true. */
1219: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
1220: return(0);
1221: }
1223: /*
1224: This only works correctly for square matrices where the subblock A->A is the
1225: diagonal block
1226: */
1229: PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1230: {
1231: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1235: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1236: MatGetDiagonal(a->A,v);
1237: return(0);
1238: }
1242: PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1243: {
1244: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1248: MatScale(a->A,aa);
1249: MatScale(a->B,aa);
1250: return(0);
1251: }
1255: PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1256: {
1257: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1258: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1260: PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1261: PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1262: PetscInt *cmap,*idx_p,cstart = mat->cstartbs;
1265: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1266: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1267: mat->getrowactive = PETSC_TRUE;
1269: if (!mat->rowvalues && (idx || v)) {
1270: /*
1271: allocate enough space to hold information from the longest row.
1272: */
1273: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1274: PetscInt max = 1,mbs = mat->mbs,tmp;
1275: for (i=0; i<mbs; i++) {
1276: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1277: if (max < tmp) { max = tmp; }
1278: }
1279: PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);
1280: }
1281: lrow = row - brstart;
1283: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1284: if (!v) {pvA = 0; pvB = 0;}
1285: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1286: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1287: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1288: nztot = nzA + nzB;
1290: cmap = mat->garray;
1291: if (v || idx) {
1292: if (nztot) {
1293: /* Sort by increasing column numbers, assuming A and B already sorted */
1294: PetscInt imark = -1;
1295: if (v) {
1296: *v = v_p = mat->rowvalues;
1297: for (i=0; i<nzB; i++) {
1298: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1299: else break;
1300: }
1301: imark = i;
1302: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1303: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1304: }
1305: if (idx) {
1306: *idx = idx_p = mat->rowindices;
1307: if (imark > -1) {
1308: for (i=0; i<imark; i++) {
1309: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1310: }
1311: } else {
1312: for (i=0; i<nzB; i++) {
1313: if (cmap[cworkB[i]/bs] < cstart)
1314: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1315: else break;
1316: }
1317: imark = i;
1318: }
1319: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1320: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321: }
1322: } else {
1323: if (idx) *idx = 0;
1324: if (v) *v = 0;
1325: }
1326: }
1327: *nz = nztot;
1328: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1329: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1330: return(0);
1331: }
1335: PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1336: {
1337: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1340: if (!baij->getrowactive) {
1341: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1342: }
1343: baij->getrowactive = PETSC_FALSE;
1344: return(0);
1345: }
1349: PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1350: {
1351: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1355: MatZeroEntries(l->A);
1356: MatZeroEntries(l->B);
1357: return(0);
1358: }
1362: PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1363: {
1364: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1365: Mat A = a->A,B = a->B;
1367: PetscReal isend[5],irecv[5];
1370: info->block_size = (PetscReal)matin->rmap->bs;
1371: MatGetInfo(A,MAT_LOCAL,info);
1372: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1373: isend[3] = info->memory; isend[4] = info->mallocs;
1374: MatGetInfo(B,MAT_LOCAL,info);
1375: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1376: isend[3] += info->memory; isend[4] += info->mallocs;
1377: if (flag == MAT_LOCAL) {
1378: info->nz_used = isend[0];
1379: info->nz_allocated = isend[1];
1380: info->nz_unneeded = isend[2];
1381: info->memory = isend[3];
1382: info->mallocs = isend[4];
1383: } else if (flag == MAT_GLOBAL_MAX) {
1384: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);
1385: info->nz_used = irecv[0];
1386: info->nz_allocated = irecv[1];
1387: info->nz_unneeded = irecv[2];
1388: info->memory = irecv[3];
1389: info->mallocs = irecv[4];
1390: } else if (flag == MAT_GLOBAL_SUM) {
1391: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);
1392: info->nz_used = irecv[0];
1393: info->nz_allocated = irecv[1];
1394: info->nz_unneeded = irecv[2];
1395: info->memory = irecv[3];
1396: info->mallocs = irecv[4];
1397: } else {
1398: SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1399: }
1400: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1401: info->fill_ratio_needed = 0;
1402: info->factor_mallocs = 0;
1403: return(0);
1404: }
1408: PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1409: {
1410: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1414: switch (op) {
1415: case MAT_NEW_NONZERO_LOCATIONS:
1416: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1417: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1418: case MAT_KEEP_NONZERO_PATTERN:
1419: case MAT_NEW_NONZERO_LOCATION_ERR:
1420: MatSetOption(a->A,op,flg);
1421: MatSetOption(a->B,op,flg);
1422: break;
1423: case MAT_ROW_ORIENTED:
1424: a->roworiented = flg;
1425: MatSetOption(a->A,op,flg);
1426: MatSetOption(a->B,op,flg);
1427: break;
1428: case MAT_NEW_DIAGONALS:
1429: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1430: break;
1431: case MAT_IGNORE_OFF_PROC_ENTRIES:
1432: a->donotstash = flg;
1433: break;
1434: case MAT_USE_HASH_TABLE:
1435: a->ht_flag = flg;
1436: break;
1437: case MAT_SYMMETRIC:
1438: case MAT_STRUCTURALLY_SYMMETRIC:
1439: case MAT_HERMITIAN:
1440: case MAT_SYMMETRY_ETERNAL:
1441: MatSetOption(a->A,op,flg);
1442: break;
1443: default:
1444: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1445: }
1446: return(0);
1447: }
1451: PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1452: {
1453: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1454: Mat_SeqBAIJ *Aloc;
1455: Mat B;
1457: PetscInt M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1458: PetscInt bs=A->rmap->bs,mbs=baij->mbs;
1459: MatScalar *a;
1460:
1462: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1463: if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1464: MatCreate(((PetscObject)A)->comm,&B);
1465: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
1466: MatSetType(B,((PetscObject)A)->type_name);
1467: MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);
1468: } else {
1469: B = *matout;
1470: }
1472: /* copy over the A part */
1473: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1474: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475: PetscMalloc(bs*sizeof(PetscInt),&rvals);
1476:
1477: for (i=0; i<mbs; i++) {
1478: rvals[0] = bs*(baij->rstartbs + i);
1479: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1480: for (j=ai[i]; j<ai[i+1]; j++) {
1481: col = (baij->cstartbs+aj[j])*bs;
1482: for (k=0; k<bs; k++) {
1483: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1484: col++; a += bs;
1485: }
1486: }
1487: }
1488: /* copy over the B part */
1489: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1490: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1491: for (i=0; i<mbs; i++) {
1492: rvals[0] = bs*(baij->rstartbs + i);
1493: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1494: for (j=ai[i]; j<ai[i+1]; j++) {
1495: col = baij->garray[aj[j]]*bs;
1496: for (k=0; k<bs; k++) {
1497: MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);
1498: col++; a += bs;
1499: }
1500: }
1501: }
1502: PetscFree(rvals);
1503: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1504: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1505:
1506: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1507: *matout = B;
1508: } else {
1509: MatHeaderCopy(A,B);
1510: }
1511: return(0);
1512: }
1516: PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1517: {
1518: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1519: Mat a = baij->A,b = baij->B;
1521: PetscInt s1,s2,s3;
1524: MatGetLocalSize(mat,&s2,&s3);
1525: if (rr) {
1526: VecGetLocalSize(rr,&s1);
1527: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1528: /* Overlap communication with computation. */
1529: VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1530: }
1531: if (ll) {
1532: VecGetLocalSize(ll,&s1);
1533: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1534: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1535: }
1536: /* scale the diagonal block */
1537: (*a->ops->diagonalscale)(a,ll,rr);
1539: if (rr) {
1540: /* Do a scatter end and then right scale the off-diagonal block */
1541: VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1542: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1543: }
1544:
1545: return(0);
1546: }
1550: PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1551: {
1552: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1554: PetscMPIInt imdex,size = l->size,n,rank = l->rank;
1555: PetscInt i,*owners = A->rmap->range;
1556: PetscInt *nprocs,j,idx,nsends,row;
1557: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
1558: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1559: PetscInt *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1560: MPI_Comm comm = ((PetscObject)A)->comm;
1561: MPI_Request *send_waits,*recv_waits;
1562: MPI_Status recv_status,*send_status;
1563: #if defined(PETSC_DEBUG)
1564: PetscTruth found = PETSC_FALSE;
1565: #endif
1566:
1568: /* first count number of contributors to each processor */
1569: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
1570: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
1571: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
1572: j = 0;
1573: for (i=0; i<N; i++) {
1574: if (lastidx > (idx = rows[i])) j = 0;
1575: lastidx = idx;
1576: for (; j<size; j++) {
1577: if (idx >= owners[j] && idx < owners[j+1]) {
1578: nprocs[2*j]++;
1579: nprocs[2*j+1] = 1;
1580: owner[i] = j;
1581: #if defined(PETSC_DEBUG)
1582: found = PETSC_TRUE;
1583: #endif
1584: break;
1585: }
1586: }
1587: #if defined(PETSC_DEBUG)
1588: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1589: found = PETSC_FALSE;
1590: #endif
1591: }
1592: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1593:
1594: /* inform other processors of number of messages and max length*/
1595: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1596:
1597: /* post receives: */
1598: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
1599: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1600: for (i=0; i<nrecvs; i++) {
1601: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1602: }
1603:
1604: /* do sends:
1605: 1) starts[i] gives the starting index in svalues for stuff going to
1606: the ith processor
1607: */
1608: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
1609: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1610: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
1611: starts[0] = 0;
1612: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1613: for (i=0; i<N; i++) {
1614: svalues[starts[owner[i]]++] = rows[i];
1615: }
1616:
1617: starts[0] = 0;
1618: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619: count = 0;
1620: for (i=0; i<size; i++) {
1621: if (nprocs[2*i+1]) {
1622: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
1623: }
1624: }
1625: PetscFree(starts);
1627: base = owners[rank];
1628:
1629: /* wait on receives */
1630: PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);
1631: count = nrecvs;
1632: slen = 0;
1633: while (count) {
1634: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1635: /* unpack receives into our local space */
1636: MPI_Get_count(&recv_status,MPIU_INT,&n);
1637: source[imdex] = recv_status.MPI_SOURCE;
1638: lens[imdex] = n;
1639: slen += n;
1640: count--;
1641: }
1642: PetscFree(recv_waits);
1643:
1644: /* move the data into the send scatter */
1645: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
1646: count = 0;
1647: for (i=0; i<nrecvs; i++) {
1648: values = rvalues + i*nmax;
1649: for (j=0; j<lens[i]; j++) {
1650: lrows[count++] = values[j] - base;
1651: }
1652: }
1653: PetscFree(rvalues);
1654: PetscFree2(lens,source);
1655: PetscFree(owner);
1656: PetscFree(nprocs);
1657:
1658: /* actually zap the local rows */
1659: /*
1660: Zero the required rows. If the "diagonal block" of the matrix
1661: is square and the user wishes to set the diagonal we use separate
1662: code so that MatSetValues() is not called for each diagonal allocating
1663: new memory, thus calling lots of mallocs and slowing things down.
1665: */
1666: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1667: MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);
1668: if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1669: MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);
1670: } else if (diag != 0.0) {
1671: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1672: if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1673: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1674: MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1675: }
1676: for (i=0; i<slen; i++) {
1677: row = lrows[i] + rstart_bs;
1678: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
1679: }
1680: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1681: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1682: } else {
1683: MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);
1684: }
1686: PetscFree(lrows);
1688: /* wait on sends */
1689: if (nsends) {
1690: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1691: MPI_Waitall(nsends,send_waits,send_status);
1692: PetscFree(send_status);
1693: }
1694: PetscFree(send_waits);
1695: PetscFree(svalues);
1697: return(0);
1698: }
1702: PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1703: {
1704: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1708: MatSetUnfactored(a->A);
1709: return(0);
1710: }
1712: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1716: PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1717: {
1718: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1719: Mat a,b,c,d;
1720: PetscTruth flg;
1724: a = matA->A; b = matA->B;
1725: c = matB->A; d = matB->B;
1727: MatEqual(a,c,&flg);
1728: if (flg) {
1729: MatEqual(b,d,&flg);
1730: }
1731: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
1732: return(0);
1733: }
1737: PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1738: {
1740: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1741: Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
1744: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1745: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1746: MatCopy_Basic(A,B,str);
1747: } else {
1748: MatCopy(a->A,b->A,str);
1749: MatCopy(a->B,b->B,str);
1750: }
1751: return(0);
1752: }
1756: PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1757: {
1761: MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1762: return(0);
1763: }
1767: PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1768: {
1770: Mat_MPIBAIJ *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1771: PetscBLASInt bnz,one=1;
1772: Mat_SeqBAIJ *x,*y;
1775: if (str == SAME_NONZERO_PATTERN) {
1776: PetscScalar alpha = a;
1777: x = (Mat_SeqBAIJ *)xx->A->data;
1778: y = (Mat_SeqBAIJ *)yy->A->data;
1779: bnz = PetscBLASIntCast(x->nz);
1780: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1781: x = (Mat_SeqBAIJ *)xx->B->data;
1782: y = (Mat_SeqBAIJ *)yy->B->data;
1783: bnz = PetscBLASIntCast(x->nz);
1784: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1785: } else {
1786: MatAXPY_Basic(Y,a,X,str);
1787: }
1788: return(0);
1789: }
1793: PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1794: {
1795: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1796: PetscInt rbs,cbs;
1800: MatSetBlockSize(a->A,bs);
1801: MatSetBlockSize(a->B,bs);
1802: PetscLayoutGetBlockSize(A->rmap,&rbs);
1803: PetscLayoutGetBlockSize(A->cmap,&cbs);
1804: if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1805: if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1806: return(0);
1807: }
1811: PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1812: {
1813: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1817: MatRealPart(a->A);
1818: MatRealPart(a->B);
1819: return(0);
1820: }
1824: PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1825: {
1826: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1830: MatImaginaryPart(a->A);
1831: MatImaginaryPart(a->B);
1832: return(0);
1833: }
1837: PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1838: {
1840: IS iscol_local;
1841: PetscInt csize;
1844: ISGetLocalSize(iscol,&csize);
1845: if (call == MAT_REUSE_MATRIX) {
1846: PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);
1847: if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1848: } else {
1849: ISAllGather(iscol,&iscol_local);
1850: }
1851: MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);
1852: if (call == MAT_INITIAL_MATRIX) {
1853: PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);
1854: ISDestroy(iscol_local);
1855: }
1856: return(0);
1857: }
1861: /*
1862: Not great since it makes two copies of the submatrix, first an SeqBAIJ
1863: in local and then by concatenating the local matrices the end result.
1864: Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1865: */
1866: PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1867: {
1869: PetscMPIInt rank,size;
1870: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1871: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1872: Mat *local,M,Mreuse;
1873: MatScalar *vwork,*aa;
1874: MPI_Comm comm = ((PetscObject)mat)->comm;
1875: Mat_SeqBAIJ *aij;
1879: MPI_Comm_rank(comm,&rank);
1880: MPI_Comm_size(comm,&size);
1882: if (call == MAT_REUSE_MATRIX) {
1883: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
1884: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1885: local = &Mreuse;
1886: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
1887: } else {
1888: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
1889: Mreuse = *local;
1890: PetscFree(local);
1891: }
1893: /*
1894: m - number of local rows
1895: n - number of columns (same on all processors)
1896: rstart - first row in new global matrix generated
1897: */
1898: MatGetBlockSize(mat,&bs);
1899: MatGetSize(Mreuse,&m,&n);
1900: m = m/bs;
1901: n = n/bs;
1902:
1903: if (call == MAT_INITIAL_MATRIX) {
1904: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1905: ii = aij->i;
1906: jj = aij->j;
1908: /*
1909: Determine the number of non-zeros in the diagonal and off-diagonal
1910: portions of the matrix in order to do correct preallocation
1911: */
1913: /* first get start and end of "diagonal" columns */
1914: if (csize == PETSC_DECIDE) {
1915: ISGetSize(isrow,&mglobal);
1916: if (mglobal == n*bs) { /* square matrix */
1917: nlocal = m;
1918: } else {
1919: nlocal = n/size + ((n % size) > rank);
1920: }
1921: } else {
1922: nlocal = csize/bs;
1923: }
1924: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
1925: rstart = rend - nlocal;
1926: if (rank == size - 1 && rend != n) {
1927: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1928: }
1930: /* next, compute all the lengths */
1931: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
1932: olens = dlens + m;
1933: for (i=0; i<m; i++) {
1934: jend = ii[i+1] - ii[i];
1935: olen = 0;
1936: dlen = 0;
1937: for (j=0; j<jend; j++) {
1938: if (*jj < rstart || *jj >= rend) olen++;
1939: else dlen++;
1940: jj++;
1941: }
1942: olens[i] = olen;
1943: dlens[i] = dlen;
1944: }
1945: MatCreate(comm,&M);
1946: MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);
1947: MatSetType(M,((PetscObject)mat)->type_name);
1948: MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);
1949: PetscFree(dlens);
1950: } else {
1951: PetscInt ml,nl;
1953: M = *newmat;
1954: MatGetLocalSize(M,&ml,&nl);
1955: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1956: MatZeroEntries(M);
1957: /*
1958: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1959: rather than the slower MatSetValues().
1960: */
1961: M->was_assembled = PETSC_TRUE;
1962: M->assembled = PETSC_FALSE;
1963: }
1964: MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);
1965: MatGetOwnershipRange(M,&rstart,&rend);
1966: aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1967: ii = aij->i;
1968: jj = aij->j;
1969: aa = aij->a;
1970: for (i=0; i<m; i++) {
1971: row = rstart/bs + i;
1972: nz = ii[i+1] - ii[i];
1973: cwork = jj; jj += nz;
1974: vwork = aa; aa += nz;
1975: MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
1976: }
1978: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
1979: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
1980: *newmat = M;
1982: /* save submatrix used in processor for next request */
1983: if (call == MAT_INITIAL_MATRIX) {
1984: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
1985: PetscObjectDereference((PetscObject)Mreuse);
1986: }
1988: return(0);
1989: }
1993: PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1994: {
1995: MPI_Comm comm,pcomm;
1996: PetscInt first,local_size,nrows;
1997: const PetscInt *rows;
1998: PetscMPIInt size;
1999: IS crowp,growp,irowp,lrowp,lcolp,icolp;
2003: PetscObjectGetComm((PetscObject)A,&comm);
2004: /* make a collective version of 'rowp' */
2005: PetscObjectGetComm((PetscObject)rowp,&pcomm);
2006: if (pcomm==comm) {
2007: crowp = rowp;
2008: } else {
2009: ISGetSize(rowp,&nrows);
2010: ISGetIndices(rowp,&rows);
2011: ISCreateGeneral(comm,nrows,rows,&crowp);
2012: ISRestoreIndices(rowp,&rows);
2013: }
2014: /* collect the global row permutation and invert it */
2015: ISAllGather(crowp,&growp);
2016: ISSetPermutation(growp);
2017: if (pcomm!=comm) {
2018: ISDestroy(crowp);
2019: }
2020: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
2021: /* get the local target indices */
2022: MatGetOwnershipRange(A,&first,PETSC_NULL);
2023: MatGetLocalSize(A,&local_size,PETSC_NULL);
2024: ISGetIndices(irowp,&rows);
2025: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);
2026: ISRestoreIndices(irowp,&rows);
2027: ISDestroy(irowp);
2028: /* the column permutation is so much easier;
2029: make a local version of 'colp' and invert it */
2030: PetscObjectGetComm((PetscObject)colp,&pcomm);
2031: MPI_Comm_size(pcomm,&size);
2032: if (size==1) {
2033: lcolp = colp;
2034: } else {
2035: ISGetSize(colp,&nrows);
2036: ISGetIndices(colp,&rows);
2037: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);
2038: }
2039: ISSetPermutation(lcolp);
2040: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
2041: ISSetPermutation(icolp);
2042: if (size>1) {
2043: ISRestoreIndices(colp,&rows);
2044: ISDestroy(lcolp);
2045: }
2046: /* now we just get the submatrix */
2047: MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
2048: /* clean up */
2049: ISDestroy(lrowp);
2050: ISDestroy(icolp);
2051: return(0);
2052: }
2056: PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2057: {
2058: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2059: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
2062: if (nghosts) { *nghosts = B->nbs;}
2063: if (ghosts) {*ghosts = baij->garray;}
2064: return(0);
2065: }
2067: EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2071: /*
2072: This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2073: */
2074: PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2075: {
2076: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
2077: PetscErrorCode ierr;
2078: PetscMPIInt size,*ncolsonproc,*disp,nn;
2079: PetscInt bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2080: const PetscInt *is;
2081: PetscInt nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2082: PetscInt *rowhit,M,cstart,cend,colb;
2083: PetscInt *columnsforrow,l;
2084: IS *isa;
2085: PetscTruth done,flg;
2086: ISLocalToGlobalMapping map = mat->bmapping;
2087: PetscInt *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2090: if (!mat->assembled) {
2091: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2092: }
2093: if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2095: ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);
2097: MatGetBlockSize(mat,&bs);
2098: M = mat->rmap->n/bs;
2099: cstart = mat->cmap->rstart/bs;
2100: cend = mat->cmap->rend/bs;
2101: c->M = mat->rmap->N/bs; /* set the global rows and columns and local rows */
2102: c->N = mat->cmap->N/bs;
2103: c->m = mat->rmap->n/bs;
2104: c->rstart = mat->rmap->rstart/bs;
2106: c->ncolors = nis;
2107: PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);
2108: PetscMalloc(nis*sizeof(PetscInt*),&c->columns);
2109: PetscMalloc(nis*sizeof(PetscInt),&c->nrows);
2110: PetscMalloc(nis*sizeof(PetscInt*),&c->rows);
2111: PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);
2112: PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));
2114: /* Allow access to data structures of local part of matrix */
2115: if (!baij->colmap) {
2116: CreateColmap_MPIBAIJ_Private(mat);
2117: }
2118: MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);
2119: MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);
2120:
2121: PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);
2122: PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);
2124: for (i=0; i<nis; i++) {
2125: ISGetLocalSize(isa[i],&n);
2126: ISGetIndices(isa[i],&is);
2127: c->ncolumns[i] = n;
2128: if (n) {
2129: PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);
2130: PetscLogObjectMemory(c,n*sizeof(PetscInt));
2131: PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));
2132: } else {
2133: c->columns[i] = 0;
2134: }
2136: if (ctype == IS_COLORING_GLOBAL){
2137: /* Determine the total (parallel) number of columns of this color */
2138: MPI_Comm_size(((PetscObject)mat)->comm,&size);
2139: PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);
2141: nn = PetscMPIIntCast(n);
2142: MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);
2143: nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2144: if (!nctot) {
2145: PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");
2146: }
2148: disp[0] = 0;
2149: for (j=1; j<size; j++) {
2150: disp[j] = disp[j-1] + ncolsonproc[j-1];
2151: }
2153: /* Get complete list of columns for color on each processor */
2154: PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);
2155: MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);
2156: PetscFree2(ncolsonproc,disp);
2157: } else if (ctype == IS_COLORING_GHOSTED){
2158: /* Determine local number of columns of this color on this process, including ghost points */
2159: nctot = n;
2160: PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);
2161: PetscMemcpy(cols,is,n*sizeof(PetscInt));
2162: } else {
2163: SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2164: }
2166: /*
2167: Mark all rows affect by these columns
2168: */
2169: /* Temporary option to allow for debugging/testing */
2170: flg = PETSC_FALSE;
2171: PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);
2172: if (!flg) {/*-----------------------------------------------------------------------------*/
2173: /* crude, fast version */
2174: PetscMemzero(rowhit,M*sizeof(PetscInt));
2175: /* loop over columns*/
2176: for (j=0; j<nctot; j++) {
2177: if (ctype == IS_COLORING_GHOSTED) {
2178: col = ltog[cols[j]];
2179: } else {
2180: col = cols[j];
2181: }
2182: if (col >= cstart && col < cend) {
2183: /* column is in diagonal block of matrix */
2184: rows = A_cj + A_ci[col-cstart];
2185: m = A_ci[col-cstart+1] - A_ci[col-cstart];
2186: } else {
2187: #if defined (PETSC_USE_CTABLE)
2188: PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2189: colb --;
2190: #else
2191: colb = baij->colmap[col] - 1;
2192: #endif
2193: if (colb == -1) {
2194: m = 0;
2195: } else {
2196: colb = colb/bs;
2197: rows = B_cj + B_ci[colb];
2198: m = B_ci[colb+1] - B_ci[colb];
2199: }
2200: }
2201: /* loop over columns marking them in rowhit */
2202: for (k=0; k<m; k++) {
2203: rowhit[*rows++] = col + 1;
2204: }
2205: }
2207: /* count the number of hits */
2208: nrows = 0;
2209: for (j=0; j<M; j++) {
2210: if (rowhit[j]) nrows++;
2211: }
2212: c->nrows[i] = nrows;
2213: PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);
2214: PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);
2215: PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));
2216: nrows = 0;
2217: for (j=0; j<M; j++) {
2218: if (rowhit[j]) {
2219: c->rows[i][nrows] = j;
2220: c->columnsforrow[i][nrows] = rowhit[j] - 1;
2221: nrows++;
2222: }
2223: }
2224: } else {/*-------------------------------------------------------------------------------*/
2225: /* slow version, using rowhit as a linked list */
2226: PetscInt currentcol,fm,mfm;
2227: rowhit[M] = M;
2228: nrows = 0;
2229: /* loop over columns*/
2230: for (j=0; j<nctot; j++) {
2231: if (ctype == IS_COLORING_GHOSTED) {
2232: col = ltog[cols[j]];
2233: } else {
2234: col = cols[j];
2235: }
2236: if (col >= cstart && col < cend) {
2237: /* column is in diagonal block of matrix */
2238: rows = A_cj + A_ci[col-cstart];
2239: m = A_ci[col-cstart+1] - A_ci[col-cstart];
2240: } else {
2241: #if defined (PETSC_USE_CTABLE)
2242: PetscTableFind(baij->colmap,col+1,&colb);
2243: colb --;
2244: #else
2245: colb = baij->colmap[col] - 1;
2246: #endif
2247: if (colb == -1) {
2248: m = 0;
2249: } else {
2250: colb = colb/bs;
2251: rows = B_cj + B_ci[colb];
2252: m = B_ci[colb+1] - B_ci[colb];
2253: }
2254: }
2256: /* loop over columns marking them in rowhit */
2257: fm = M; /* fm points to first entry in linked list */
2258: for (k=0; k<m; k++) {
2259: currentcol = *rows++;
2260: /* is it already in the list? */
2261: do {
2262: mfm = fm;
2263: fm = rowhit[fm];
2264: } while (fm < currentcol);
2265: /* not in list so add it */
2266: if (fm != currentcol) {
2267: nrows++;
2268: columnsforrow[currentcol] = col;
2269: /* next three lines insert new entry into linked list */
2270: rowhit[mfm] = currentcol;
2271: rowhit[currentcol] = fm;
2272: fm = currentcol;
2273: /* fm points to present position in list since we know the columns are sorted */
2274: } else {
2275: SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2276: }
2277: }
2278: }
2279: c->nrows[i] = nrows;
2280: PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);
2281: PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);
2282: PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));
2283: /* now store the linked list of rows into c->rows[i] */
2284: nrows = 0;
2285: fm = rowhit[M];
2286: do {
2287: c->rows[i][nrows] = fm;
2288: c->columnsforrow[i][nrows++] = columnsforrow[fm];
2289: fm = rowhit[fm];
2290: } while (fm < M);
2291: } /* ---------------------------------------------------------------------------------------*/
2292: PetscFree(cols);
2293: }
2295: /* Optimize by adding the vscale, and scaleforrow[][] fields */
2296: /*
2297: vscale will contain the "diagonal" on processor scalings followed by the off processor
2298: */
2299: if (ctype == IS_COLORING_GLOBAL) {
2300: PetscInt *garray;
2301: PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);
2302: for (i=0; i<baij->B->cmap->n/bs; i++) {
2303: for (j=0; j<bs; j++) {
2304: garray[i*bs+j] = bs*baij->garray[i]+j;
2305: }
2306: }
2307: VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);
2308: PetscFree(garray);
2309: CHKMEMQ;
2310: PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);
2311: for (k=0; k<c->ncolors; k++) {
2312: PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);
2313: for (l=0; l<c->nrows[k]; l++) {
2314: col = c->columnsforrow[k][l];
2315: if (col >= cstart && col < cend) {
2316: /* column is in diagonal block of matrix */
2317: colb = col - cstart;
2318: } else {
2319: /* column is in "off-processor" part */
2320: #if defined (PETSC_USE_CTABLE)
2321: PetscTableFind(baij->colmap,col+1,&colb);
2322: colb --;
2323: #else
2324: colb = baij->colmap[col] - 1;
2325: #endif
2326: colb = colb/bs;
2327: colb += cend - cstart;
2328: }
2329: c->vscaleforrow[k][l] = colb;
2330: }
2331: }
2332: } else if (ctype == IS_COLORING_GHOSTED) {
2333: /* Get gtol mapping */
2334: PetscInt N = mat->cmap->N, *gtol;
2335: PetscMalloc((N+1)*sizeof(PetscInt),>ol);
2336: for (i=0; i<N; i++) gtol[i] = -1;
2337: for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2338:
2339: c->vscale = 0; /* will be created in MatFDColoringApply() */
2340: PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);
2341: for (k=0; k<c->ncolors; k++) {
2342: PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);
2343: for (l=0; l<c->nrows[k]; l++) {
2344: col = c->columnsforrow[k][l]; /* global column index */
2345: c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2346: }
2347: }
2348: PetscFree(gtol);
2349: }
2350: ISColoringRestoreIS(iscoloring,&isa);
2352: PetscFree(rowhit);
2353: PetscFree(columnsforrow);
2354: MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);
2355: MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);
2356: CHKMEMQ;
2357: return(0);
2358: }
2362: PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2363: {
2364: Mat B;
2365: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2366: Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2367: Mat_SeqAIJ *b;
2369: PetscMPIInt size,rank,*recvcounts = 0,*displs = 0;
2370: PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2371: PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2374: MPI_Comm_size(((PetscObject)A)->comm,&size);
2375: MPI_Comm_rank(((PetscObject)A)->comm,&rank);
2377: /* ----------------------------------------------------------------
2378: Tell every processor the number of nonzeros per row
2379: */
2380: PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);
2381: for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2382: lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2383: }
2384: sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2385: PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);
2386: displs = recvcounts + size;
2387: for (i=0; i<size; i++) {
2388: recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2389: displs[i] = A->rmap->range[i]/bs;
2390: }
2391: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2392: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2393: #else
2394: MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2395: #endif
2396: /* ---------------------------------------------------------------
2397: Create the sequential matrix of the same type as the local block diagonal
2398: */
2399: MatCreate(PETSC_COMM_SELF,&B);
2400: MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);
2401: MatSetType(B,MATSEQAIJ);
2402: MatSeqAIJSetPreallocation(B,0,lens);
2403: b = (Mat_SeqAIJ *)B->data;
2405: /*--------------------------------------------------------------------
2406: Copy my part of matrix column indices over
2407: */
2408: sendcount = ad->nz + bd->nz;
2409: jsendbuf = b->j + b->i[rstarts[rank]/bs];
2410: a_jsendbuf = ad->j;
2411: b_jsendbuf = bd->j;
2412: n = A->rmap->rend/bs - A->rmap->rstart/bs;
2413: cnt = 0;
2414: for (i=0; i<n; i++) {
2416: /* put in lower diagonal portion */
2417: m = bd->i[i+1] - bd->i[i];
2418: while (m > 0) {
2419: /* is it above diagonal (in bd (compressed) numbering) */
2420: if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2421: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2422: m--;
2423: }
2425: /* put in diagonal portion */
2426: for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2427: jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2428: }
2430: /* put in upper diagonal portion */
2431: while (m-- > 0) {
2432: jsendbuf[cnt++] = garray[*b_jsendbuf++];
2433: }
2434: }
2435: if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2437: /*--------------------------------------------------------------------
2438: Gather all column indices to all processors
2439: */
2440: for (i=0; i<size; i++) {
2441: recvcounts[i] = 0;
2442: for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2443: recvcounts[i] += lens[j];
2444: }
2445: }
2446: displs[0] = 0;
2447: for (i=1; i<size; i++) {
2448: displs[i] = displs[i-1] + recvcounts[i-1];
2449: }
2450: #if defined(PETSC_HAVE_MPI_IN_PLACE)
2451: MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2452: #else
2453: MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);
2454: #endif
2455: /*--------------------------------------------------------------------
2456: Assemble the matrix into useable form (note numerical values not yet set)
2457: */
2458: /* set the b->ilen (length of each row) values */
2459: PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));
2460: /* set the b->i indices */
2461: b->i[0] = 0;
2462: for (i=1; i<=A->rmap->N/bs; i++) {
2463: b->i[i] = b->i[i-1] + lens[i-1];
2464: }
2465: PetscFree(lens);
2466: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2467: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2468: PetscFree(recvcounts);
2470: if (A->symmetric){
2471: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
2472: } else if (A->hermitian) {
2473: MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);
2474: } else if (A->structurally_symmetric) {
2475: MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
2476: }
2477: *newmat = B;
2478: return(0);
2479: }
2483: PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2484: {
2485: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
2487: Vec bb1 = 0;
2490: if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2491: VecDuplicate(bb,&bb1);
2492: }
2494: if (flag == SOR_APPLY_UPPER) {
2495: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2496: return(0);
2497: }
2499: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2500: if (flag & SOR_ZERO_INITIAL_GUESS) {
2501: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2502: its--;
2503: }
2504:
2505: while (its--) {
2506: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2507: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2509: /* update rhs: bb1 = bb - B*x */
2510: VecScale(mat->lvec,-1.0);
2511: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2513: /* local sweep */
2514: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);
2515: }
2516: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2517: if (flag & SOR_ZERO_INITIAL_GUESS) {
2518: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2519: its--;
2520: }
2521: while (its--) {
2522: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2523: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2525: /* update rhs: bb1 = bb - B*x */
2526: VecScale(mat->lvec,-1.0);
2527: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2529: /* local sweep */
2530: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);
2531: }
2532: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2533: if (flag & SOR_ZERO_INITIAL_GUESS) {
2534: (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);
2535: its--;
2536: }
2537: while (its--) {
2538: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2539: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
2541: /* update rhs: bb1 = bb - B*x */
2542: VecScale(mat->lvec,-1.0);
2543: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
2545: /* local sweep */
2546: (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);
2547: }
2548: } else {
2549: SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2550: }
2552: if (bb1) {VecDestroy(bb1);}
2553: return(0);
2554: }
2559: /* -------------------------------------------------------------------*/
2560: static struct _MatOps MatOps_Values = {
2561: MatSetValues_MPIBAIJ,
2562: MatGetRow_MPIBAIJ,
2563: MatRestoreRow_MPIBAIJ,
2564: MatMult_MPIBAIJ,
2565: /* 4*/ MatMultAdd_MPIBAIJ,
2566: MatMultTranspose_MPIBAIJ,
2567: MatMultTransposeAdd_MPIBAIJ,
2568: 0,
2569: 0,
2570: 0,
2571: /*10*/ 0,
2572: 0,
2573: 0,
2574: MatSOR_MPIBAIJ,
2575: MatTranspose_MPIBAIJ,
2576: /*15*/ MatGetInfo_MPIBAIJ,
2577: MatEqual_MPIBAIJ,
2578: MatGetDiagonal_MPIBAIJ,
2579: MatDiagonalScale_MPIBAIJ,
2580: MatNorm_MPIBAIJ,
2581: /*20*/ MatAssemblyBegin_MPIBAIJ,
2582: MatAssemblyEnd_MPIBAIJ,
2583: MatSetOption_MPIBAIJ,
2584: MatZeroEntries_MPIBAIJ,
2585: /*24*/ MatZeroRows_MPIBAIJ,
2586: 0,
2587: 0,
2588: 0,
2589: 0,
2590: /*29*/ MatSetUpPreallocation_MPIBAIJ,
2591: 0,
2592: 0,
2593: 0,
2594: 0,
2595: /*34*/ MatDuplicate_MPIBAIJ,
2596: 0,
2597: 0,
2598: 0,
2599: 0,
2600: /*39*/ MatAXPY_MPIBAIJ,
2601: MatGetSubMatrices_MPIBAIJ,
2602: MatIncreaseOverlap_MPIBAIJ,
2603: MatGetValues_MPIBAIJ,
2604: MatCopy_MPIBAIJ,
2605: /*44*/ 0,
2606: MatScale_MPIBAIJ,
2607: 0,
2608: 0,
2609: 0,
2610: /*49*/ MatSetBlockSize_MPIBAIJ,
2611: 0,
2612: 0,
2613: 0,
2614: 0,
2615: /*54*/ MatFDColoringCreate_MPIBAIJ,
2616: 0,
2617: MatSetUnfactored_MPIBAIJ,
2618: MatPermute_MPIBAIJ,
2619: MatSetValuesBlocked_MPIBAIJ,
2620: /*59*/ MatGetSubMatrix_MPIBAIJ,
2621: MatDestroy_MPIBAIJ,
2622: MatView_MPIBAIJ,
2623: 0,
2624: 0,
2625: /*64*/ 0,
2626: 0,
2627: 0,
2628: 0,
2629: 0,
2630: /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2631: 0,
2632: 0,
2633: 0,
2634: 0,
2635: /*74*/ 0,
2636: MatFDColoringApply_BAIJ,
2637: 0,
2638: 0,
2639: 0,
2640: /*79*/ 0,
2641: 0,
2642: 0,
2643: 0,
2644: MatLoad_MPIBAIJ,
2645: /*84*/ 0,
2646: 0,
2647: 0,
2648: 0,
2649: 0,
2650: /*89*/ 0,
2651: 0,
2652: 0,
2653: 0,
2654: 0,
2655: /*94*/ 0,
2656: 0,
2657: 0,
2658: 0,
2659: 0,
2660: /*99*/ 0,
2661: 0,
2662: 0,
2663: 0,
2664: 0,
2665: /*104*/0,
2666: MatRealPart_MPIBAIJ,
2667: MatImaginaryPart_MPIBAIJ,
2668: 0,
2669: 0,
2670: /*109*/0,
2671: 0,
2672: 0,
2673: 0,
2674: 0,
2675: /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2676: 0,
2677: MatGetGhosts_MPIBAIJ
2678: };
2683: PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2684: {
2686: *a = ((Mat_MPIBAIJ *)A->data)->A;
2687: *iscopy = PETSC_FALSE;
2688: return(0);
2689: }
2699: PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2700: {
2701: PetscInt m,rstart,cstart,cend;
2702: PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2703: const PetscInt *JJ=0;
2704: PetscScalar *values=0;
2709: if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2710: PetscLayoutSetBlockSize(B->rmap,bs);
2711: PetscLayoutSetBlockSize(B->cmap,bs);
2712: PetscLayoutSetUp(B->rmap);
2713: PetscLayoutSetUp(B->cmap);
2714: m = B->rmap->n/bs;
2715: rstart = B->rmap->rstart/bs;
2716: cstart = B->cmap->rstart/bs;
2717: cend = B->cmap->rend/bs;
2719: if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2720: PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);
2721: for (i=0; i<m; i++) {
2722: nz = ii[i+1] - ii[i];
2723: if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2724: nz_max = PetscMax(nz_max,nz);
2725: JJ = jj + ii[i];
2726: for (j=0; j<nz; j++) {
2727: if (*JJ >= cstart) break;
2728: JJ++;
2729: }
2730: d = 0;
2731: for (; j<nz; j++) {
2732: if (*JJ++ >= cend) break;
2733: d++;
2734: }
2735: d_nnz[i] = d;
2736: o_nnz[i] = nz - d;
2737: }
2738: MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);
2739: PetscFree2(d_nnz,o_nnz);
2741: values = (PetscScalar*)V;
2742: if (!values) {
2743: PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);
2744: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
2745: }
2746: for (i=0; i<m; i++) {
2747: PetscInt row = i + rstart;
2748: PetscInt ncols = ii[i+1] - ii[i];
2749: const PetscInt *icols = jj + ii[i];
2750: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2751: MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);
2752: }
2754: if (!V) { PetscFree(values); }
2755: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2756: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2758: return(0);
2759: }
2764: /*@C
2765: MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2766: (the default parallel PETSc format).
2768: Collective on MPI_Comm
2770: Input Parameters:
2771: + A - the matrix
2772: . i - the indices into j for the start of each local row (starts with zero)
2773: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2774: - v - optional values in the matrix
2776: Level: developer
2778: .keywords: matrix, aij, compressed row, sparse, parallel
2780: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2781: @*/
2782: PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2783: {
2784: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2787: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);
2788: if (f) {
2789: (*f)(B,bs,i,j,v);
2790: }
2791: return(0);
2792: }
2797: PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2798: {
2799: Mat_MPIBAIJ *b;
2801: PetscInt i, newbs = PetscAbs(bs);
2804: if (bs < 0) {
2805: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");
2806: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2807: PetscOptionsEnd();
2808: bs = PetscAbs(bs);
2809: }
2810: if ((d_nnz || o_nnz) && newbs != bs) {
2811: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2812: }
2813: bs = newbs;
2816: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2817: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2818: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2819: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2820: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2821:
2822: PetscLayoutSetBlockSize(B->rmap,bs);
2823: PetscLayoutSetBlockSize(B->cmap,bs);
2824: PetscLayoutSetUp(B->rmap);
2825: PetscLayoutSetUp(B->cmap);
2827: if (d_nnz) {
2828: for (i=0; i<B->rmap->n/bs; i++) {
2829: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2830: }
2831: }
2832: if (o_nnz) {
2833: for (i=0; i<B->rmap->n/bs; i++) {
2834: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2835: }
2836: }
2838: b = (Mat_MPIBAIJ*)B->data;
2839: b->bs2 = bs*bs;
2840: b->mbs = B->rmap->n/bs;
2841: b->nbs = B->cmap->n/bs;
2842: b->Mbs = B->rmap->N/bs;
2843: b->Nbs = B->cmap->N/bs;
2845: for (i=0; i<=b->size; i++) {
2846: b->rangebs[i] = B->rmap->range[i]/bs;
2847: }
2848: b->rstartbs = B->rmap->rstart/bs;
2849: b->rendbs = B->rmap->rend/bs;
2850: b->cstartbs = B->cmap->rstart/bs;
2851: b->cendbs = B->cmap->rend/bs;
2853: if (!B->preallocated) {
2854: MatCreate(PETSC_COMM_SELF,&b->A);
2855: MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);
2856: MatSetType(b->A,MATSEQBAIJ);
2857: PetscLogObjectParent(B,b->A);
2858: MatCreate(PETSC_COMM_SELF,&b->B);
2859: MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);
2860: MatSetType(b->B,MATSEQBAIJ);
2861: PetscLogObjectParent(B,b->B);
2862: MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);
2863: }
2865: MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);
2866: MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);
2867: B->preallocated = PETSC_TRUE;
2868: return(0);
2869: }
2873: EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2874: EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2881: PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2882: {
2883: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data;
2885: Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2886: PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2887: const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2890: PetscMalloc((M+1)*sizeof(PetscInt),&ii);
2891: ii[0] = 0;
2892: CHKMEMQ;
2893: for (i=0; i<M; i++) {
2894: if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2895: if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2896: ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2897: /* remove one from count of matrix has diagonal */
2898: for (j=id[i]; j<id[i+1]; j++) {
2899: if (jd[j] == i) {ii[i+1]--;break;}
2900: }
2901: CHKMEMQ;
2902: }
2903: PetscMalloc(ii[M]*sizeof(PetscInt),&jj);
2904: cnt = 0;
2905: for (i=0; i<M; i++) {
2906: for (j=io[i]; j<io[i+1]; j++) {
2907: if (garray[jo[j]] > rstart) break;
2908: jj[cnt++] = garray[jo[j]];
2909: CHKMEMQ;
2910: }
2911: for (k=id[i]; k<id[i+1]; k++) {
2912: if (jd[k] != i) {
2913: jj[cnt++] = rstart + jd[k];
2914: CHKMEMQ;
2915: }
2916: }
2917: for (;j<io[i+1]; j++) {
2918: jj[cnt++] = garray[jo[j]];
2919: CHKMEMQ;
2920: }
2921: }
2922: MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);
2923: return(0);
2924: }
2927: /*MC
2928: MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2930: Options Database Keys:
2931: + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2932: . -mat_block_size <bs> - set the blocksize used to store the matrix
2933: - -mat_use_hash_table <fact>
2935: Level: beginner
2937: .seealso: MatCreateMPIBAIJ
2938: M*/
2943: PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2944: {
2945: Mat_MPIBAIJ *b;
2947: PetscTruth flg;
2950: PetscNewLog(B,Mat_MPIBAIJ,&b);
2951: B->data = (void*)b;
2954: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
2955: B->mapping = 0;
2956: B->assembled = PETSC_FALSE;
2958: B->insertmode = NOT_SET_VALUES;
2959: MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);
2960: MPI_Comm_size(((PetscObject)B)->comm,&b->size);
2962: /* build local table of row and column ownerships */
2963: PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);
2965: /* build cache for off array entries formed */
2966: MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);
2967: b->donotstash = PETSC_FALSE;
2968: b->colmap = PETSC_NULL;
2969: b->garray = PETSC_NULL;
2970: b->roworiented = PETSC_TRUE;
2972: /* stuff used in block assembly */
2973: b->barray = 0;
2975: /* stuff used for matrix vector multiply */
2976: b->lvec = 0;
2977: b->Mvctx = 0;
2979: /* stuff for MatGetRow() */
2980: b->rowindices = 0;
2981: b->rowvalues = 0;
2982: b->getrowactive = PETSC_FALSE;
2984: /* hash table stuff */
2985: b->ht = 0;
2986: b->hd = 0;
2987: b->ht_size = 0;
2988: b->ht_flag = PETSC_FALSE;
2989: b->ht_fact = 0;
2990: b->ht_total_ct = 0;
2991: b->ht_insert_ct = 0;
2993: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");
2994: PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);
2995: if (flg) {
2996: PetscReal fact = 1.39;
2997: MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);
2998: PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);
2999: if (fact <= 1.0) fact = 1.39;
3000: MatMPIBAIJSetHashTableFactor(B,fact);
3001: PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);
3002: }
3003: PetscOptionsEnd();
3005: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3006: "MatConvert_MPIBAIJ_MPIAdj",
3007: MatConvert_MPIBAIJ_MPIAdj);
3008: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3009: "MatStoreValues_MPIBAIJ",
3010: MatStoreValues_MPIBAIJ);
3011: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3012: "MatRetrieveValues_MPIBAIJ",
3013: MatRetrieveValues_MPIBAIJ);
3014: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3015: "MatGetDiagonalBlock_MPIBAIJ",
3016: MatGetDiagonalBlock_MPIBAIJ);
3017: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3018: "MatMPIBAIJSetPreallocation_MPIBAIJ",
3019: MatMPIBAIJSetPreallocation_MPIBAIJ);
3020: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3021: "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3022: MatMPIBAIJSetPreallocationCSR_MPIBAIJ);
3023: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3024: "MatDiagonalScaleLocal_MPIBAIJ",
3025: MatDiagonalScaleLocal_MPIBAIJ);
3026: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3027: "MatSetHashTableFactor_MPIBAIJ",
3028: MatSetHashTableFactor_MPIBAIJ);
3029: PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);
3030: return(0);
3031: }
3034: /*MC
3035: MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3037: This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3038: and MATMPIBAIJ otherwise.
3040: Options Database Keys:
3041: . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3043: Level: beginner
3045: .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3046: M*/
3051: PetscErrorCode MatCreate_BAIJ(Mat A)
3052: {
3054: PetscMPIInt size;
3057: MPI_Comm_size(((PetscObject)A)->comm,&size);
3058: if (size == 1) {
3059: MatSetType(A,MATSEQBAIJ);
3060: } else {
3061: MatSetType(A,MATMPIBAIJ);
3062: }
3063: return(0);
3064: }
3069: /*@C
3070: MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3071: (block compressed row). For good matrix assembly performance
3072: the user should preallocate the matrix storage by setting the parameters
3073: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3074: performance can be increased by more than a factor of 50.
3076: Collective on Mat
3078: Input Parameters:
3079: + A - the matrix
3080: . bs - size of blockk
3081: . d_nz - number of block nonzeros per block row in diagonal portion of local
3082: submatrix (same for all local rows)
3083: . d_nnz - array containing the number of block nonzeros in the various block rows
3084: of the in diagonal portion of the local (possibly different for each block
3085: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
3086: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
3087: submatrix (same for all local rows).
3088: - o_nnz - array containing the number of nonzeros in the various block rows of the
3089: off-diagonal portion of the local submatrix (possibly different for
3090: each block row) or PETSC_NULL.
3092: If the *_nnz parameter is given then the *_nz parameter is ignored
3094: Options Database Keys:
3095: + -mat_block_size - size of the blocks to use
3096: - -mat_use_hash_table <fact>
3098: Notes:
3099: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3100: than it must be used on all processors that share the object for that argument.
3102: Storage Information:
3103: For a square global matrix we define each processor's diagonal portion
3104: to be its local rows and the corresponding columns (a square submatrix);
3105: each processor's off-diagonal portion encompasses the remainder of the
3106: local matrix (a rectangular submatrix).
3108: The user can specify preallocated storage for the diagonal part of
3109: the local submatrix with either d_nz or d_nnz (not both). Set
3110: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3111: memory allocation. Likewise, specify preallocated storage for the
3112: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3114: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3115: the figure below we depict these three local rows and all columns (0-11).
3117: .vb
3118: 0 1 2 3 4 5 6 7 8 9 10 11
3119: -------------------
3120: row 3 | o o o d d d o o o o o o
3121: row 4 | o o o d d d o o o o o o
3122: row 5 | o o o d d d o o o o o o
3123: -------------------
3124: .ve
3125:
3126: Thus, any entries in the d locations are stored in the d (diagonal)
3127: submatrix, and any entries in the o locations are stored in the
3128: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3129: stored simply in the MATSEQBAIJ format for compressed row storage.
3131: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3132: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3133: In general, for PDE problems in which most nonzeros are near the diagonal,
3134: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3135: or you will get TERRIBLE performance; see the users' manual chapter on
3136: matrices.
3138: You can call MatGetInfo() to get information on how effective the preallocation was;
3139: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3140: You can also run with the option -info and look for messages with the string
3141: malloc in them to see if additional memory allocation was needed.
3143: Level: intermediate
3145: .keywords: matrix, block, aij, compressed row, sparse, parallel
3147: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3148: @*/
3149: PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3150: {
3151: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3154: PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);
3155: if (f) {
3156: (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
3157: }
3158: return(0);
3159: }
3163: /*@C
3164: MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3165: (block compressed row). For good matrix assembly performance
3166: the user should preallocate the matrix storage by setting the parameters
3167: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3168: performance can be increased by more than a factor of 50.
3170: Collective on MPI_Comm
3172: Input Parameters:
3173: + comm - MPI communicator
3174: . bs - size of blockk
3175: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3176: This value should be the same as the local size used in creating the
3177: y vector for the matrix-vector product y = Ax.
3178: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3179: This value should be the same as the local size used in creating the
3180: x vector for the matrix-vector product y = Ax.
3181: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3182: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3183: . d_nz - number of nonzero blocks per block row in diagonal portion of local
3184: submatrix (same for all local rows)
3185: . d_nnz - array containing the number of nonzero blocks in the various block rows
3186: of the in diagonal portion of the local (possibly different for each block
3187: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
3188: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
3189: submatrix (same for all local rows).
3190: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
3191: off-diagonal portion of the local submatrix (possibly different for
3192: each block row) or PETSC_NULL.
3194: Output Parameter:
3195: . A - the matrix
3197: Options Database Keys:
3198: + -mat_block_size - size of the blocks to use
3199: - -mat_use_hash_table <fact>
3201: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3202: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3203: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3205: Notes:
3206: If the *_nnz parameter is given then the *_nz parameter is ignored
3208: A nonzero block is any block that as 1 or more nonzeros in it
3210: The user MUST specify either the local or global matrix dimensions
3211: (possibly both).
3213: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
3214: than it must be used on all processors that share the object for that argument.
3216: Storage Information:
3217: For a square global matrix we define each processor's diagonal portion
3218: to be its local rows and the corresponding columns (a square submatrix);
3219: each processor's off-diagonal portion encompasses the remainder of the
3220: local matrix (a rectangular submatrix).
3222: The user can specify preallocated storage for the diagonal part of
3223: the local submatrix with either d_nz or d_nnz (not both). Set
3224: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3225: memory allocation. Likewise, specify preallocated storage for the
3226: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3228: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3229: the figure below we depict these three local rows and all columns (0-11).
3231: .vb
3232: 0 1 2 3 4 5 6 7 8 9 10 11
3233: -------------------
3234: row 3 | o o o d d d o o o o o o
3235: row 4 | o o o d d d o o o o o o
3236: row 5 | o o o d d d o o o o o o
3237: -------------------
3238: .ve
3239:
3240: Thus, any entries in the d locations are stored in the d (diagonal)
3241: submatrix, and any entries in the o locations are stored in the
3242: o (off-diagonal) submatrix. Note that the d and the o submatrices are
3243: stored simply in the MATSEQBAIJ format for compressed row storage.
3245: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3246: and o_nz should indicate the number of block nonzeros per row in the o matrix.
3247: In general, for PDE problems in which most nonzeros are near the diagonal,
3248: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
3249: or you will get TERRIBLE performance; see the users' manual chapter on
3250: matrices.
3252: Level: intermediate
3254: .keywords: matrix, block, aij, compressed row, sparse, parallel
3256: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3257: @*/
3258: PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3259: {
3261: PetscMPIInt size;
3264: MatCreate(comm,A);
3265: MatSetSizes(*A,m,n,M,N);
3266: MPI_Comm_size(comm,&size);
3267: if (size > 1) {
3268: MatSetType(*A,MATMPIBAIJ);
3269: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
3270: } else {
3271: MatSetType(*A,MATSEQBAIJ);
3272: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
3273: }
3274: return(0);
3275: }
3279: static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3280: {
3281: Mat mat;
3282: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3284: PetscInt len=0;
3287: *newmat = 0;
3288: MatCreate(((PetscObject)matin)->comm,&mat);
3289: MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);
3290: MatSetType(mat,((PetscObject)matin)->type_name);
3291: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
3293: mat->factor = matin->factor;
3294: mat->preallocated = PETSC_TRUE;
3295: mat->assembled = PETSC_TRUE;
3296: mat->insertmode = NOT_SET_VALUES;
3298: a = (Mat_MPIBAIJ*)mat->data;
3299: mat->rmap->bs = matin->rmap->bs;
3300: a->bs2 = oldmat->bs2;
3301: a->mbs = oldmat->mbs;
3302: a->nbs = oldmat->nbs;
3303: a->Mbs = oldmat->Mbs;
3304: a->Nbs = oldmat->Nbs;
3305:
3306: PetscLayoutCopy(matin->rmap,&mat->rmap);
3307: PetscLayoutCopy(matin->cmap,&mat->cmap);
3309: a->size = oldmat->size;
3310: a->rank = oldmat->rank;
3311: a->donotstash = oldmat->donotstash;
3312: a->roworiented = oldmat->roworiented;
3313: a->rowindices = 0;
3314: a->rowvalues = 0;
3315: a->getrowactive = PETSC_FALSE;
3316: a->barray = 0;
3317: a->rstartbs = oldmat->rstartbs;
3318: a->rendbs = oldmat->rendbs;
3319: a->cstartbs = oldmat->cstartbs;
3320: a->cendbs = oldmat->cendbs;
3322: /* hash table stuff */
3323: a->ht = 0;
3324: a->hd = 0;
3325: a->ht_size = 0;
3326: a->ht_flag = oldmat->ht_flag;
3327: a->ht_fact = oldmat->ht_fact;
3328: a->ht_total_ct = 0;
3329: a->ht_insert_ct = 0;
3331: PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));
3332: if (oldmat->colmap) {
3333: #if defined (PETSC_USE_CTABLE)
3334: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
3335: #else
3336: PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);
3337: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
3338: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));
3339: #endif
3340: } else a->colmap = 0;
3342: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3343: PetscMalloc(len*sizeof(PetscInt),&a->garray);
3344: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
3345: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));
3346: } else a->garray = 0;
3347:
3348: MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);
3349: VecDuplicate(oldmat->lvec,&a->lvec);
3350: PetscLogObjectParent(mat,a->lvec);
3351: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
3352: PetscLogObjectParent(mat,a->Mvctx);
3354: MatDuplicate(oldmat->A,cpvalues,&a->A);
3355: PetscLogObjectParent(mat,a->A);
3356: MatDuplicate(oldmat->B,cpvalues,&a->B);
3357: PetscLogObjectParent(mat,a->B);
3358: PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);
3359: *newmat = mat;
3361: return(0);
3362: }
3366: PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3367: {
3368: Mat A;
3370: int fd;
3371: PetscInt i,nz,j,rstart,rend;
3372: PetscScalar *vals,*buf;
3373: MPI_Comm comm = ((PetscObject)viewer)->comm;
3374: MPI_Status status;
3375: PetscMPIInt rank,size,maxnz;
3376: PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3377: PetscInt *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3378: PetscInt jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3379: PetscMPIInt tag = ((PetscObject)viewer)->tag;
3380: PetscInt *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3381: PetscInt dcount,kmax,k,nzcount,tmp,mend;
3384: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");
3385: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3386: PetscOptionsEnd();
3388: MPI_Comm_size(comm,&size);
3389: MPI_Comm_rank(comm,&rank);
3390: if (!rank) {
3391: PetscViewerBinaryGetDescriptor(viewer,&fd);
3392: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
3393: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3394: }
3396: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
3397: M = header[1]; N = header[2];
3399: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3401: /*
3402: This code adds extra rows to make sure the number of rows is
3403: divisible by the blocksize
3404: */
3405: Mbs = M/bs;
3406: extra_rows = bs - M + bs*Mbs;
3407: if (extra_rows == bs) extra_rows = 0;
3408: else Mbs++;
3409: if (extra_rows && !rank) {
3410: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3411: }
3413: /* determine ownership of all rows */
3414: mbs = Mbs/size + ((Mbs % size) > rank);
3415: m = mbs*bs;
3416: PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);
3417: MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
3419: /* process 0 needs enough room for process with most rows */
3420: if (!rank) {
3421: mmax = rowners[1];
3422: for (i=2; i<size; i++) {
3423: mmax = PetscMax(mmax,rowners[i]);
3424: }
3425: mmax*=bs;
3426: } else mmax = m;
3428: rowners[0] = 0;
3429: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3430: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3431: rstart = rowners[rank];
3432: rend = rowners[rank+1];
3434: /* distribute row lengths to all processors */
3435: PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);
3436: if (!rank) {
3437: mend = m;
3438: if (size == 1) mend = mend - extra_rows;
3439: PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);
3440: for (j=mend; j<m; j++) locrowlens[j] = 1;
3441: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
3442: PetscMalloc(size*sizeof(PetscInt),&procsnz);
3443: PetscMemzero(procsnz,size*sizeof(PetscInt));
3444: for (j=0; j<m; j++) {
3445: procsnz[0] += locrowlens[j];
3446: }
3447: for (i=1; i<size; i++) {
3448: mend = browners[i+1] - browners[i];
3449: if (i == size-1) mend = mend - extra_rows;
3450: PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);
3451: for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3452: /* calculate the number of nonzeros on each processor */
3453: for (j=0; j<browners[i+1]-browners[i]; j++) {
3454: procsnz[i] += rowlengths[j];
3455: }
3456: MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);
3457: }
3458: PetscFree(rowlengths);
3459: } else {
3460: MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);
3461: }
3463: if (!rank) {
3464: /* determine max buffer needed and allocate it */
3465: maxnz = procsnz[0];
3466: for (i=1; i<size; i++) {
3467: maxnz = PetscMax(maxnz,procsnz[i]);
3468: }
3469: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
3471: /* read in my part of the matrix column indices */
3472: nz = procsnz[0];
3473: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
3474: mycols = ibuf;
3475: if (size == 1) nz -= extra_rows;
3476: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
3477: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3479: /* read in every ones (except the last) and ship off */
3480: for (i=1; i<size-1; i++) {
3481: nz = procsnz[i];
3482: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3483: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
3484: }
3485: /* read in the stuff for the last proc */
3486: if (size != 1) {
3487: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
3488: PetscBinaryRead(fd,cols,nz,PETSC_INT);
3489: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3490: MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);
3491: }
3492: PetscFree(cols);
3493: } else {
3494: /* determine buffer space needed for message */
3495: nz = 0;
3496: for (i=0; i<m; i++) {
3497: nz += locrowlens[i];
3498: }
3499: PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);
3500: mycols = ibuf;
3501: /* receive message of column indices*/
3502: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
3503: MPI_Get_count(&status,MPIU_INT,&maxnz);
3504: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3505: }
3506:
3507: /* loop over local rows, determining number of off diagonal entries */
3508: PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);
3509: PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);
3510: PetscMemzero(mask,Mbs*sizeof(PetscInt));
3511: PetscMemzero(masked1,Mbs*sizeof(PetscInt));
3512: PetscMemzero(masked2,Mbs*sizeof(PetscInt));
3513: rowcount = 0; nzcount = 0;
3514: for (i=0; i<mbs; i++) {
3515: dcount = 0;
3516: odcount = 0;
3517: for (j=0; j<bs; j++) {
3518: kmax = locrowlens[rowcount];
3519: for (k=0; k<kmax; k++) {
3520: tmp = mycols[nzcount++]/bs;
3521: if (!mask[tmp]) {
3522: mask[tmp] = 1;
3523: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3524: else masked1[dcount++] = tmp;
3525: }
3526: }
3527: rowcount++;
3528: }
3529:
3530: dlens[i] = dcount;
3531: odlens[i] = odcount;
3533: /* zero out the mask elements we set */
3534: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3535: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3536: }
3538: /* create our matrix */
3539: MatCreate(comm,&A);
3540: MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);
3541: MatSetType(A,type);CHKERRQ(ierr)
3542: MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);
3544: if (!rank) {
3545: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);
3546: /* read in my part of the matrix numerical values */
3547: nz = procsnz[0];
3548: vals = buf;
3549: mycols = ibuf;
3550: if (size == 1) nz -= extra_rows;
3551: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3552: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3554: /* insert into matrix */
3555: jj = rstart*bs;
3556: for (i=0; i<m; i++) {
3557: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3558: mycols += locrowlens[i];
3559: vals += locrowlens[i];
3560: jj++;
3561: }
3562: /* read in other processors (except the last one) and ship out */
3563: for (i=1; i<size-1; i++) {
3564: nz = procsnz[i];
3565: vals = buf;
3566: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3567: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
3568: }
3569: /* the last proc */
3570: if (size != 1){
3571: nz = procsnz[i] - extra_rows;
3572: vals = buf;
3573: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
3574: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3575: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);
3576: }
3577: PetscFree(procsnz);
3578: } else {
3579: /* receive numeric values */
3580: PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);
3582: /* receive message of values*/
3583: vals = buf;
3584: mycols = ibuf;
3585: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
3586: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
3587: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3589: /* insert into matrix */
3590: jj = rstart*bs;
3591: for (i=0; i<m; i++) {
3592: MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
3593: mycols += locrowlens[i];
3594: vals += locrowlens[i];
3595: jj++;
3596: }
3597: }
3598: PetscFree(locrowlens);
3599: PetscFree(buf);
3600: PetscFree(ibuf);
3601: PetscFree2(rowners,browners);
3602: PetscFree2(dlens,odlens);
3603: PetscFree3(mask,masked1,masked2);
3604: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
3605: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
3607: *newmat = A;
3608: return(0);
3609: }
3613: /*@
3614: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3616: Input Parameters:
3617: . mat - the matrix
3618: . fact - factor
3620: Collective on Mat
3622: Level: advanced
3624: Notes:
3625: This can also be set by the command line option: -mat_use_hash_table <fact>
3627: .keywords: matrix, hashtable, factor, HT
3629: .seealso: MatSetOption()
3630: @*/
3631: PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3632: {
3633: PetscErrorCode ierr,(*f)(Mat,PetscReal);
3636: PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);
3637: if (f) {
3638: (*f)(mat,fact);
3639: }
3640: return(0);
3641: }
3646: PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3647: {
3648: Mat_MPIBAIJ *baij;
3651: baij = (Mat_MPIBAIJ*)mat->data;
3652: baij->ht_fact = fact;
3653: return(0);
3654: }
3659: PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3660: {
3661: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3663: *Ad = a->A;
3664: *Ao = a->B;
3665: *colmap = a->garray;
3666: return(0);
3667: }
3669: /*
3670: Special version for direct calls from Fortran (to eliminate two function call overheads
3671: */
3672: #if defined(PETSC_HAVE_FORTRAN_CAPS)
3673: #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3674: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3675: #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3676: #endif
3680: /*@C
3681: MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3683: Collective on Mat
3685: Input Parameters:
3686: + mat - the matrix
3687: . min - number of input rows
3688: . im - input rows
3689: . nin - number of input columns
3690: . in - input columns
3691: . v - numerical values input
3692: - addvin - INSERT_VALUES or ADD_VALUES
3694: Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3696: Level: advanced
3698: .seealso: MatSetValuesBlocked()
3699: @*/
3700: PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3701: {
3702: /* convert input arguments to C version */
3703: Mat mat = *matin;
3704: PetscInt m = *min, n = *nin;
3705: InsertMode addv = *addvin;
3707: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
3708: const MatScalar *value;
3709: MatScalar *barray=baij->barray;
3710: PetscTruth roworiented = baij->roworiented;
3711: PetscErrorCode ierr;
3712: PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs;
3713: PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3714: PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3715:
3717: /* tasks normally handled by MatSetValuesBlocked() */
3718: if (mat->insertmode == NOT_SET_VALUES) {
3719: mat->insertmode = addv;
3720: }
3721: #if defined(PETSC_USE_DEBUG)
3722: else if (mat->insertmode != addv) {
3723: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3724: }
3725: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3726: #endif
3727: if (mat->assembled) {
3728: mat->was_assembled = PETSC_TRUE;
3729: mat->assembled = PETSC_FALSE;
3730: }
3731: PetscLogEventBegin(MAT_SetValues,mat,0,0,0);
3734: if(!barray) {
3735: PetscMalloc(bs2*sizeof(MatScalar),&barray);
3736: baij->barray = barray;
3737: }
3739: if (roworiented) {
3740: stepval = (n-1)*bs;
3741: } else {
3742: stepval = (m-1)*bs;
3743: }
3744: for (i=0; i<m; i++) {
3745: if (im[i] < 0) continue;
3746: #if defined(PETSC_USE_DEBUG)
3747: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3748: #endif
3749: if (im[i] >= rstart && im[i] < rend) {
3750: row = im[i] - rstart;
3751: for (j=0; j<n; j++) {
3752: /* If NumCol = 1 then a copy is not required */
3753: if ((roworiented) && (n == 1)) {
3754: barray = (MatScalar*)v + i*bs2;
3755: } else if((!roworiented) && (m == 1)) {
3756: barray = (MatScalar*)v + j*bs2;
3757: } else { /* Here a copy is required */
3758: if (roworiented) {
3759: value = v + i*(stepval+bs)*bs + j*bs;
3760: } else {
3761: value = v + j*(stepval+bs)*bs + i*bs;
3762: }
3763: for (ii=0; ii<bs; ii++,value+=stepval) {
3764: for (jj=0; jj<bs; jj++) {
3765: *barray++ = *value++;
3766: }
3767: }
3768: barray -=bs2;
3769: }
3770:
3771: if (in[j] >= cstart && in[j] < cend){
3772: col = in[j] - cstart;
3773: MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);
3774: }
3775: else if (in[j] < 0) continue;
3776: #if defined(PETSC_USE_DEBUG)
3777: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3778: #endif
3779: else {
3780: if (mat->was_assembled) {
3781: if (!baij->colmap) {
3782: CreateColmap_MPIBAIJ_Private(mat);
3783: }
3785: #if defined(PETSC_USE_DEBUG)
3786: #if defined (PETSC_USE_CTABLE)
3787: { PetscInt data;
3788: PetscTableFind(baij->colmap,in[j]+1,&data);
3789: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3790: }
3791: #else
3792: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3793: #endif
3794: #endif
3795: #if defined (PETSC_USE_CTABLE)
3796: PetscTableFind(baij->colmap,in[j]+1,&col);
3797: col = (col - 1)/bs;
3798: #else
3799: col = (baij->colmap[in[j]] - 1)/bs;
3800: #endif
3801: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3802: DisAssemble_MPIBAIJ(mat);
3803: col = in[j];
3804: }
3805: }
3806: else col = in[j];
3807: MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);
3808: }
3809: }
3810: } else {
3811: if (!baij->donotstash) {
3812: if (roworiented) {
3813: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3814: } else {
3815: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
3816: }
3817: }
3818: }
3819: }
3820:
3821: /* task normally handled by MatSetValuesBlocked() */
3822: PetscLogEventEnd(MAT_SetValues,mat,0,0,0);
3823: return(0);
3824: }