Actual source code: baij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include ../src/mat/impls/baij/seq/baij.h
8: #include petscblaslapack.h
9: #include ../src/mat/blockinvert.h
13: /*@
14: MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16: Collective on Mat
18: Input Parameters:
19: . mat - the matrix
21: Level: advanced
22: @*/
23: PetscErrorCode MatSeqBAIJInvertBlockDiagonal(Mat mat)
24: {
25: PetscErrorCode ierr,(*f)(Mat);
29: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
33: if (f) {
34: (*f)(mat);
35: } else {
36: SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37: }
38: return(0);
39: }
44: PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45: {
46: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
48: PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
49: MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25];
50: PetscReal shift = 0.0;
53: if (a->idiagvalid) return(0);
54: MatMarkDiagonal_SeqBAIJ(A);
55: diag_offset = a->diag;
56: if (!a->idiag) {
57: PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
58: PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));
59: }
60: diag = a->idiag;
61: mdiag = a->idiag+bs*bs*mbs;
62: /* factor and invert each block */
63: switch (bs){
64: case 1:
65: for (i=0; i<mbs; i++) {
66: odiag = v + 1*diag_offset[i];
67: diag[0] = odiag[0];
68: mdiag[0] = odiag[0];
69: diag[0] = 1.0 / (diag[0] + shift);
70: diag += 1;
71: mdiag += 1;
72: }
73: break;
74: case 2:
75: for (i=0; i<mbs; i++) {
76: odiag = v + 4*diag_offset[i];
77: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79: Kernel_A_gets_inverse_A_2(diag,shift);
80: diag += 4;
81: mdiag += 4;
82: }
83: break;
84: case 3:
85: for (i=0; i<mbs; i++) {
86: odiag = v + 9*diag_offset[i];
87: diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88: diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89: diag[8] = odiag[8];
90: mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91: mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92: mdiag[8] = odiag[8];
93: Kernel_A_gets_inverse_A_3(diag,shift);
94: diag += 9;
95: mdiag += 9;
96: }
97: break;
98: case 4:
99: for (i=0; i<mbs; i++) {
100: odiag = v + 16*diag_offset[i];
101: PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
102: PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
103: Kernel_A_gets_inverse_A_4(diag,shift);
104: diag += 16;
105: mdiag += 16;
106: }
107: break;
108: case 5:
109: for (i=0; i<mbs; i++) {
110: odiag = v + 25*diag_offset[i];
111: PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
112: PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
113: Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
114: diag += 25;
115: mdiag += 25;
116: }
117: break;
118: case 6:
119: for (i=0; i<mbs; i++) {
120: odiag = v + 36*diag_offset[i];
121: PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
122: PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
123: Kernel_A_gets_inverse_A_6(diag,shift);
124: diag += 36;
125: mdiag += 36;
126: }
127: break;
128: default:
129: SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130: }
131: a->idiagvalid = PETSC_TRUE;
132: return(0);
133: }
138: PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139: {
140: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
141: PetscScalar *x,x1,s1;
142: const PetscScalar *b;
143: const MatScalar *aa = a->a, *idiag,*mdiag,*v;
144: PetscErrorCode ierr;
145: PetscInt m = a->mbs,i,i2,nz,j;
146: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
149: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150: its = its*lits;
151: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
157: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
159: diag = a->diag;
160: idiag = a->idiag;
161: VecGetArray(xx,&x);
162: VecGetArray(bb,(PetscScalar**)&b);
164: if (flag & SOR_ZERO_INITIAL_GUESS) {
165: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166: x[0] = b[0]*idiag[0];
167: i2 = 1;
168: idiag += 1;
169: for (i=1; i<m; i++) {
170: v = aa + ai[i];
171: vi = aj + ai[i];
172: nz = diag[i] - ai[i];
173: s1 = b[i2];
174: for (j=0; j<nz; j++) {
175: s1 -= v[j]*x[vi[j]];
176: }
177: x[i2] = idiag[0]*s1;
178: idiag += 1;
179: i2 += 1;
180: }
181: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
182: PetscLogFlops(a->nz);
183: }
184: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
185: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
186: i2 = 0;
187: mdiag = a->idiag+a->mbs;
188: for (i=0; i<m; i++) {
189: x1 = x[i2];
190: x[i2] = mdiag[0]*x1;
191: mdiag += 1;
192: i2 += 1;
193: }
194: PetscLogFlops(m);
195: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
196: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
197: }
198: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
199: idiag = a->idiag+a->mbs - 1;
200: i2 = m - 1;
201: x1 = x[i2];
202: x[i2] = idiag[0]*x1;
203: idiag -= 1;
204: i2 -= 1;
205: for (i=m-2; i>=0; i--) {
206: v = aa + (diag[i]+1);
207: vi = aj + diag[i] + 1;
208: nz = ai[i+1] - diag[i] - 1;
209: s1 = x[i2];
210: for (j=0; j<nz; j++) {
211: s1 -= v[j]*x[vi[j]];
212: }
213: x[i2] = idiag[0]*s1;
214: idiag -= 1;
215: i2 -= 1;
216: }
217: PetscLogFlops(a->nz);
218: }
219: } else {
220: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
221: }
222: VecRestoreArray(xx,&x);
223: VecRestoreArray(bb,(PetscScalar**)&b);
224: return(0);
225: }
229: PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
230: {
231: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
232: PetscScalar *x,x1,x2,s1,s2;
233: const PetscScalar *b;
234: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
235: PetscErrorCode ierr;
236: PetscInt m = a->mbs,i,i2,nz,idx,j,it;
237: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
240: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
241: its = its*lits;
242: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
243: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
244: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
245: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
246: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
248: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
250: diag = a->diag;
251: idiag = a->idiag;
252: VecGetArray(xx,&x);
253: VecGetArray(bb,(PetscScalar**)&b);
255: if (flag & SOR_ZERO_INITIAL_GUESS) {
256: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
257: x[0] = b[0]*idiag[0] + b[1]*idiag[2];
258: x[1] = b[0]*idiag[1] + b[1]*idiag[3];
259: i2 = 2;
260: idiag += 4;
261: for (i=1; i<m; i++) {
262: v = aa + 4*ai[i];
263: vi = aj + ai[i];
264: nz = diag[i] - ai[i];
265: s1 = b[i2]; s2 = b[i2+1];
266: for (j=0; j<nz; j++) {
267: idx = 2*vi[j];
268: it = 4*j;
269: x1 = x[idx]; x2 = x[1+idx];
270: s1 -= v[it]*x1 + v[it+2]*x2;
271: s2 -= v[it+1]*x1 + v[it+3]*x2;
272: }
273: x[i2] = idiag[0]*s1 + idiag[2]*s2;
274: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
275: idiag += 4;
276: i2 += 2;
277: }
278: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
279: PetscLogFlops(4.0*(a->nz));
280: }
281: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
282: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
283: i2 = 0;
284: mdiag = a->idiag+4*a->mbs;
285: for (i=0; i<m; i++) {
286: x1 = x[i2]; x2 = x[i2+1];
287: x[i2] = mdiag[0]*x1 + mdiag[2]*x2;
288: x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
289: mdiag += 4;
290: i2 += 2;
291: }
292: PetscLogFlops(6.0*m);
293: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
294: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
295: }
296: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
297: idiag = a->idiag+4*a->mbs - 4;
298: i2 = 2*m - 2;
299: x1 = x[i2]; x2 = x[i2+1];
300: x[i2] = idiag[0]*x1 + idiag[2]*x2;
301: x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
302: idiag -= 4;
303: i2 -= 2;
304: for (i=m-2; i>=0; i--) {
305: v = aa + 4*(diag[i]+1);
306: vi = aj + diag[i] + 1;
307: nz = ai[i+1] - diag[i] - 1;
308: s1 = x[i2]; s2 = x[i2+1];
309: for (j=0; j<nz; j++) {
310: idx = 2*vi[j];
311: it = 4*j;
312: x1 = x[idx]; x2 = x[1+idx];
313: s1 -= v[it]*x1 + v[it+2]*x2;
314: s2 -= v[it+1]*x1 + v[it+3]*x2;
315: }
316: x[i2] = idiag[0]*s1 + idiag[2]*s2;
317: x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
318: idiag -= 4;
319: i2 -= 2;
320: }
321: PetscLogFlops(4.0*(a->nz));
322: }
323: } else {
324: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
325: }
326: VecRestoreArray(xx,&x);
327: VecRestoreArray(bb,(PetscScalar**)&b);
328: return(0);
329: }
333: PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
334: {
335: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
336: PetscScalar *x,x1,x2,x3,s1,s2,s3;
337: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
338: const PetscScalar *b;
339: PetscErrorCode ierr;
340: PetscInt m = a->mbs,i,i2,nz,idx;
341: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
344: its = its*lits;
345: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
346: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
347: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
348: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
349: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
350: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
352: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
354: diag = a->diag;
355: idiag = a->idiag;
356: VecGetArray(xx,&x);
357: VecGetArray(bb,(PetscScalar**)&b);
359: if (flag & SOR_ZERO_INITIAL_GUESS) {
360: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
361: x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
362: x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
363: x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
364: i2 = 3;
365: idiag += 9;
366: for (i=1; i<m; i++) {
367: v = aa + 9*ai[i];
368: vi = aj + ai[i];
369: nz = diag[i] - ai[i];
370: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
371: while (nz--) {
372: idx = 3*(*vi++);
373: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
374: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
375: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
376: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
377: v += 9;
378: }
379: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
380: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
381: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
382: idiag += 9;
383: i2 += 3;
384: }
385: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386: PetscLogFlops(9.0*(a->nz));
387: }
388: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390: i2 = 0;
391: mdiag = a->idiag+9*a->mbs;
392: for (i=0; i<m; i++) {
393: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394: x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
395: x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
396: x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
397: mdiag += 9;
398: i2 += 3;
399: }
400: PetscLogFlops(15.0*m);
401: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
402: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
403: }
404: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
405: idiag = a->idiag+9*a->mbs - 9;
406: i2 = 3*m - 3;
407: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
408: x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
409: x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
410: x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
411: idiag -= 9;
412: i2 -= 3;
413: for (i=m-2; i>=0; i--) {
414: v = aa + 9*(diag[i]+1);
415: vi = aj + diag[i] + 1;
416: nz = ai[i+1] - diag[i] - 1;
417: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
418: while (nz--) {
419: idx = 3*(*vi++);
420: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
421: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
422: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
423: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
424: v += 9;
425: }
426: x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
427: x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
428: x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
429: idiag -= 9;
430: i2 -= 3;
431: }
432: PetscLogFlops(9.0*(a->nz));
433: }
434: } else {
435: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436: }
437: VecRestoreArray(xx,&x);
438: VecRestoreArray(bb,(PetscScalar**)&b);
439: return(0);
440: }
444: PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
445: {
446: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
447: PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4;
448: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
449: const PetscScalar *b;
450: PetscErrorCode ierr;
451: PetscInt m = a->mbs,i,i2,nz,idx;
452: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
455: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
456: its = its*lits;
457: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
461: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
463: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
465: diag = a->diag;
466: idiag = a->idiag;
467: VecGetArray(xx,&x);
468: VecGetArray(bb,(PetscScalar**)&b);
470: if (flag & SOR_ZERO_INITIAL_GUESS) {
471: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472: x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12];
473: x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13];
474: x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
475: x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
476: i2 = 4;
477: idiag += 16;
478: for (i=1; i<m; i++) {
479: v = aa + 16*ai[i];
480: vi = aj + ai[i];
481: nz = diag[i] - ai[i];
482: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
483: while (nz--) {
484: idx = 4*(*vi++);
485: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
486: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
487: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
488: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
489: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
490: v += 16;
491: }
492: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
493: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
494: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
495: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
496: idiag += 16;
497: i2 += 4;
498: }
499: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500: PetscLogFlops(16.0*(a->nz));
501: }
502: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504: i2 = 0;
505: mdiag = a->idiag+16*a->mbs;
506: for (i=0; i<m; i++) {
507: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508: x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4;
509: x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4;
510: x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
511: x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
512: mdiag += 16;
513: i2 += 4;
514: }
515: PetscLogFlops(28.0*m);
516: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
517: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
518: }
519: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
520: idiag = a->idiag+16*a->mbs - 16;
521: i2 = 4*m - 4;
522: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
523: x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4;
524: x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4;
525: x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
526: x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
527: idiag -= 16;
528: i2 -= 4;
529: for (i=m-2; i>=0; i--) {
530: v = aa + 16*(diag[i]+1);
531: vi = aj + diag[i] + 1;
532: nz = ai[i+1] - diag[i] - 1;
533: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
534: while (nz--) {
535: idx = 4*(*vi++);
536: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
537: s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4;
538: s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4;
539: s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
540: s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
541: v += 16;
542: }
543: x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4;
544: x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4;
545: x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
546: x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
547: idiag -= 16;
548: i2 -= 4;
549: }
550: PetscLogFlops(16.0*(a->nz));
551: }
552: } else {
553: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
554: }
555: VecRestoreArray(xx,&x);
556: VecRestoreArray(bb,(PetscScalar**)&b);
557: return(0);
558: }
562: PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
563: {
564: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
565: PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
566: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
567: const PetscScalar *b;
568: PetscErrorCode ierr;
569: PetscInt m = a->mbs,i,i2,nz,idx;
570: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
573: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
574: its = its*lits;
575: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
576: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
577: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
578: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
579: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
581: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
583: diag = a->diag;
584: idiag = a->idiag;
585: VecGetArray(xx,&x);
586: VecGetArray(bb,(PetscScalar**)&b);
588: if (flag & SOR_ZERO_INITIAL_GUESS) {
589: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
590: x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
591: x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
592: x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
593: x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
594: x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
595: i2 = 5;
596: idiag += 25;
597: for (i=1; i<m; i++) {
598: v = aa + 25*ai[i];
599: vi = aj + ai[i];
600: nz = diag[i] - ai[i];
601: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
602: while (nz--) {
603: idx = 5*(*vi++);
604: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
606: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
607: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
608: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
609: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
610: v += 25;
611: }
612: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
613: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
614: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
615: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
616: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
617: idiag += 25;
618: i2 += 5;
619: }
620: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
621: PetscLogFlops(25.0*(a->nz));
622: }
623: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
624: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
625: i2 = 0;
626: mdiag = a->idiag+25*a->mbs;
627: for (i=0; i<m; i++) {
628: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629: x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
630: x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
631: x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
632: x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
633: x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
634: mdiag += 25;
635: i2 += 5;
636: }
637: PetscLogFlops(45.0*m);
638: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
639: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
640: }
641: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
642: idiag = a->idiag+25*a->mbs - 25;
643: i2 = 5*m - 5;
644: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
645: x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
646: x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
647: x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
648: x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
649: x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
650: idiag -= 25;
651: i2 -= 5;
652: for (i=m-2; i>=0; i--) {
653: v = aa + 25*(diag[i]+1);
654: vi = aj + diag[i] + 1;
655: nz = ai[i+1] - diag[i] - 1;
656: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
657: while (nz--) {
658: idx = 5*(*vi++);
659: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
660: s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
661: s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
662: s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
663: s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
664: s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
665: v += 25;
666: }
667: x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
668: x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
669: x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
670: x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
671: x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
672: idiag -= 25;
673: i2 -= 5;
674: }
675: PetscLogFlops(25.0*(a->nz));
676: }
677: } else {
678: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
679: }
680: VecRestoreArray(xx,&x);
681: VecRestoreArray(bb,(PetscScalar**)&b);
682: return(0);
683: }
687: PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
688: {
689: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690: PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
691: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
692: const PetscScalar *b;
693: PetscErrorCode ierr;
694: PetscInt m = a->mbs,i,i2,nz,idx;
695: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
698: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
699: its = its*lits;
700: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
701: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
702: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
703: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
704: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
706: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
708: diag = a->diag;
709: idiag = a->idiag;
710: VecGetArray(xx,&x);
711: VecGetArray(bb,(PetscScalar**)&b);
713: if (flag & SOR_ZERO_INITIAL_GUESS) {
714: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
715: x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
716: x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
717: x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
718: x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
719: x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
720: x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
721: i2 = 6;
722: idiag += 36;
723: for (i=1; i<m; i++) {
724: v = aa + 36*ai[i];
725: vi = aj + ai[i];
726: nz = diag[i] - ai[i];
727: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
728: while (nz--) {
729: idx = 6*(*vi++);
730: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
731: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
732: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
733: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
734: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
735: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
736: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
737: v += 36;
738: }
739: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
740: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
741: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
742: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
743: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
744: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
745: idiag += 36;
746: i2 += 6;
747: }
748: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
749: PetscLogFlops(36.0*(a->nz));
750: }
751: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
752: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
753: i2 = 0;
754: mdiag = a->idiag+36*a->mbs;
755: for (i=0; i<m; i++) {
756: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757: x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
758: x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
759: x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
760: x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
761: x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
762: x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
763: mdiag += 36;
764: i2 += 6;
765: }
766: PetscLogFlops(60.0*m);
767: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
769: }
770: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
771: idiag = a->idiag+36*a->mbs - 36;
772: i2 = 6*m - 6;
773: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
774: x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
775: x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
776: x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
777: x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
778: x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
779: x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
780: idiag -= 36;
781: i2 -= 6;
782: for (i=m-2; i>=0; i--) {
783: v = aa + 36*(diag[i]+1);
784: vi = aj + diag[i] + 1;
785: nz = ai[i+1] - diag[i] - 1;
786: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
787: while (nz--) {
788: idx = 6*(*vi++);
789: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
790: s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
791: s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
792: s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
793: s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
794: s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
795: s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
796: v += 36;
797: }
798: x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
799: x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
800: x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
801: x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
802: x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
803: x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
804: idiag -= 36;
805: i2 -= 6;
806: }
807: PetscLogFlops(36.0*(a->nz));
808: }
809: } else {
810: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
811: }
812: VecRestoreArray(xx,&x);
813: VecRestoreArray(bb,(PetscScalar**)&b);
814: return(0);
815: }
819: PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
820: {
821: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
822: PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
823: const MatScalar *v,*aa = a->a, *idiag,*mdiag;
824: const PetscScalar *b;
825: PetscErrorCode ierr;
826: PetscInt m = a->mbs,i,i2,nz,idx;
827: const PetscInt *diag,*ai = a->i,*aj = a->j,*vi;
830: if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
831: its = its*lits;
832: if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
833: if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
834: if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
835: if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
836: if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
838: if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}
840: diag = a->diag;
841: idiag = a->idiag;
842: VecGetArray(xx,&x);
843: VecGetArray(bb,(PetscScalar**)&b);
845: if (flag & SOR_ZERO_INITIAL_GUESS) {
846: if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
847: x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
848: x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
849: x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
850: x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
851: x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
852: x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
853: x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
854: i2 = 7;
855: idiag += 49;
856: for (i=1; i<m; i++) {
857: v = aa + 49*ai[i];
858: vi = aj + ai[i];
859: nz = diag[i] - ai[i];
860: s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
861: while (nz--) {
862: idx = 7*(*vi++);
863: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
864: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
865: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
866: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
867: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
868: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
869: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
870: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
871: v += 49;
872: }
873: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
874: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
875: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
876: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
877: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
878: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
879: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
880: idiag += 49;
881: i2 += 7;
882: }
883: /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
884: PetscLogFlops(49.0*(a->nz));
885: }
886: if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
887: (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
888: i2 = 0;
889: mdiag = a->idiag+49*a->mbs;
890: for (i=0; i<m; i++) {
891: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892: x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
893: x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
894: x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
895: x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
896: x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
897: x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
898: x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
899: mdiag += 36;
900: i2 += 6;
901: }
902: PetscLogFlops(93.0*m);
903: } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904: PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
905: }
906: if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
907: idiag = a->idiag+49*a->mbs - 49;
908: i2 = 7*m - 7;
909: x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
910: x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
911: x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
912: x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
913: x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
914: x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
915: x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
916: x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
917: idiag -= 49;
918: i2 -= 7;
919: for (i=m-2; i>=0; i--) {
920: v = aa + 49*(diag[i]+1);
921: vi = aj + diag[i] + 1;
922: nz = ai[i+1] - diag[i] - 1;
923: s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
924: while (nz--) {
925: idx = 7*(*vi++);
926: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
927: s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
928: s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
929: s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
930: s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
931: s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
932: s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
933: s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
934: v += 49;
935: }
936: x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
937: x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
938: x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
939: x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
940: x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
941: x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
942: x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
943: idiag -= 49;
944: i2 -= 7;
945: }
946: PetscLogFlops(49.0*(a->nz));
947: }
948: } else {
949: SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
950: }
951: VecRestoreArray(xx,&x);
952: VecRestoreArray(bb,(PetscScalar**)&b);
953: return(0);
954: }
956: /*
957: Special version for direct calls from Fortran (Used in PETSc-fun3d)
958: */
959: #if defined(PETSC_HAVE_FORTRAN_CAPS)
960: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
961: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
962: #define matsetvaluesblocked4_ matsetvaluesblocked4
963: #endif
968: void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
969: {
970: Mat A = *AA;
971: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
972: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
973: PetscInt *ai=a->i,*ailen=a->ilen;
974: PetscInt *aj=a->j,stepval,lastcol = -1;
975: const PetscScalar *value = v;
976: MatScalar *ap,*aa = a->a,*bap;
979: if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980: stepval = (n-1)*4;
981: for (k=0; k<m; k++) { /* loop over added rows */
982: row = im[k];
983: rp = aj + ai[row];
984: ap = aa + 16*ai[row];
985: nrow = ailen[row];
986: low = 0;
987: high = nrow;
988: for (l=0; l<n; l++) { /* loop over added columns */
989: col = in[l];
990: if (col <= lastcol) low = 0; else high = nrow;
991: lastcol = col;
992: value = v + k*(stepval+4 + l)*4;
993: while (high-low > 7) {
994: t = (low+high)/2;
995: if (rp[t] > col) high = t;
996: else low = t;
997: }
998: for (i=low; i<high; i++) {
999: if (rp[i] > col) break;
1000: if (rp[i] == col) {
1001: bap = ap + 16*i;
1002: for (ii=0; ii<4; ii++,value+=stepval) {
1003: for (jj=ii; jj<16; jj+=4) {
1004: bap[jj] += *value++;
1005: }
1006: }
1007: goto noinsert2;
1008: }
1009: }
1010: N = nrow++ - 1;
1011: high++; /* added new column index thus must search to one higher than before */
1012: /* shift up all the later entries in this row */
1013: for (ii=N; ii>=i; ii--) {
1014: rp[ii+1] = rp[ii];
1015: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1016: }
1017: if (N >= i) {
1018: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1019: }
1020: rp[i] = col;
1021: bap = ap + 16*i;
1022: for (ii=0; ii<4; ii++,value+=stepval) {
1023: for (jj=ii; jj<16; jj+=4) {
1024: bap[jj] = *value++;
1025: }
1026: }
1027: noinsert2:;
1028: low = i;
1029: }
1030: ailen[row] = nrow;
1031: }
1032: PetscFunctionReturnVoid();
1033: }
1036: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037: #define matsetvalues4_ MATSETVALUES4
1038: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039: #define matsetvalues4_ matsetvalues4
1040: #endif
1045: void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1046: {
1047: Mat A = *AA;
1048: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1049: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1050: PetscInt *ai=a->i,*ailen=a->ilen;
1051: PetscInt *aj=a->j,brow,bcol;
1052: PetscInt ridx,cidx,lastcol = -1;
1053: MatScalar *ap,value,*aa=a->a,*bap;
1054:
1056: for (k=0; k<m; k++) { /* loop over added rows */
1057: row = im[k]; brow = row/4;
1058: rp = aj + ai[brow];
1059: ap = aa + 16*ai[brow];
1060: nrow = ailen[brow];
1061: low = 0;
1062: high = nrow;
1063: for (l=0; l<n; l++) { /* loop over added columns */
1064: col = in[l]; bcol = col/4;
1065: ridx = row % 4; cidx = col % 4;
1066: value = v[l + k*n];
1067: if (col <= lastcol) low = 0; else high = nrow;
1068: lastcol = col;
1069: while (high-low > 7) {
1070: t = (low+high)/2;
1071: if (rp[t] > bcol) high = t;
1072: else low = t;
1073: }
1074: for (i=low; i<high; i++) {
1075: if (rp[i] > bcol) break;
1076: if (rp[i] == bcol) {
1077: bap = ap + 16*i + 4*cidx + ridx;
1078: *bap += value;
1079: goto noinsert1;
1080: }
1081: }
1082: N = nrow++ - 1;
1083: high++; /* added new column thus must search to one higher than before */
1084: /* shift up all the later entries in this row */
1085: for (ii=N; ii>=i; ii--) {
1086: rp[ii+1] = rp[ii];
1087: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1088: }
1089: if (N>=i) {
1090: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1091: }
1092: rp[i] = bcol;
1093: ap[16*i + 4*cidx + ridx] = value;
1094: noinsert1:;
1095: low = i;
1096: }
1097: ailen[brow] = nrow;
1098: }
1099: PetscFunctionReturnVoid();
1100: }
1103: /*
1104: Checks for missing diagonals
1105: */
1108: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1109: {
1110: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1112: PetscInt *diag,*jj = a->j,i;
1115: MatMarkDiagonal_SeqBAIJ(A);
1116: *missing = PETSC_FALSE;
1117: if (A->rmap->n > 0 && !jj) {
1118: *missing = PETSC_TRUE;
1119: if (d) *d = 0;
1120: PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1121: } else {
1122: diag = a->diag;
1123: for (i=0; i<a->mbs; i++) {
1124: if (jj[diag[i]] != i) {
1125: *missing = PETSC_TRUE;
1126: if (d) *d = i;
1127: PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1128: }
1129: }
1130: }
1131: return(0);
1132: }
1136: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1137: {
1138: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1140: PetscInt i,j,m = a->mbs;
1143: if (!a->diag) {
1144: PetscMalloc(m*sizeof(PetscInt),&a->diag);
1145: PetscLogObjectMemory(A,m*sizeof(PetscInt));
1146: a->free_diag = PETSC_TRUE;
1147: }
1148: for (i=0; i<m; i++) {
1149: a->diag[i] = a->i[i+1];
1150: for (j=a->i[i]; j<a->i[i+1]; j++) {
1151: if (a->j[j] == i) {
1152: a->diag[i] = j;
1153: break;
1154: }
1155: }
1156: }
1157: return(0);
1158: }
1161: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1165: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1166: {
1167: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1169: PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1170: PetscInt *tia, *tja;
1173: *nn = n;
1174: if (!ia) return(0);
1175: if (symmetric) {
1176: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1177: nz = tia[n];
1178: } else {
1179: tia = a->i; tja = a->j;
1180: }
1181:
1182: if (!blockcompressed && bs > 1) {
1183: (*nn) *= bs;
1184: nbs = bs;
1185: /* malloc & create the natural set of indices */
1186: PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1187: if (n) {
1188: (*ia)[0] = 0;
1189: for (j=1; j<bs; j++) {
1190: (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1191: }
1192: }
1194: for (i=1; i<n; i++) {
1195: (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1196: for (j=1; j<bs; j++) {
1197: (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1198: }
1199: }
1200: if (n) {
1201: (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1202: }
1204: if (ja) {
1205: PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1206: cnt = 0;
1207: for (i=0; i<n; i++) {
1208: for (j=0; j<bs; j++) {
1209: for (k=tia[i]; k<tia[i+1]; k++) {
1210: for (l=0; l<bs; l++) {
1211: (*ja)[cnt++] = bs*tja[k] + l;
1212: }
1213: }
1214: }
1215: }
1216: }
1218: n *= bs;
1219: nz *= bs*bs;
1220: if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1221: PetscFree(tia);
1222: PetscFree(tja);
1223: }
1224: } else if (oshift == 1) {
1225: if (symmetric) {
1226: PetscInt nz = tia[A->rmap->n/bs];
1227: /* add 1 to i and j indices */
1228: for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1229: *ia = tia;
1230: if (ja) {
1231: for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1232: *ja = tja;
1233: }
1234: } else {
1235: PetscInt nz = a->i[A->rmap->n/bs];
1236: /* malloc space and add 1 to i and j indices */
1237: PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1238: for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1239: if (ja) {
1240: PetscMalloc(nz*sizeof(PetscInt),ja);
1241: for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1242: }
1243: }
1244: } else {
1245: *ia = tia;
1246: if (ja) *ja = tja;
1247: }
1248:
1249: return(0);
1250: }
1254: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1255: {
1259: if (!ia) return(0);
1260: if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1261: PetscFree(*ia);
1262: if (ja) {PetscFree(*ja);}
1263: }
1264: return(0);
1265: }
1269: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1270: {
1271: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1275: #if defined(PETSC_USE_LOG)
1276: PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1277: #endif
1278: MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1279: if (a->row) {
1280: ISDestroy(a->row);
1281: }
1282: if (a->col) {
1283: ISDestroy(a->col);
1284: }
1285: if (a->free_diag) {PetscFree(a->diag);}
1286: PetscFree(a->idiag);
1287: if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1288: PetscFree(a->solve_work);
1289: PetscFree(a->mult_work);
1290: if (a->icol) {ISDestroy(a->icol);}
1291: PetscFree(a->saved_values);
1292: PetscFree(a->xtoy);
1293: if (a->compressedrow.use){PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1295: if (a->sbaijMat) {MatDestroy(a->sbaijMat);}
1296: if (a->parent) {MatDestroy(a->parent);}
1297: PetscFree(a);
1299: PetscObjectChangeTypeName((PetscObject)A,0);
1300: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1301: PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1302: PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1303: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1304: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1305: PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1306: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1307: PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1308: return(0);
1309: }
1313: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1314: {
1315: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1319: switch (op) {
1320: case MAT_ROW_ORIENTED:
1321: a->roworiented = flg;
1322: break;
1323: case MAT_KEEP_NONZERO_PATTERN:
1324: a->keepnonzeropattern = flg;
1325: break;
1326: case MAT_NEW_NONZERO_LOCATIONS:
1327: a->nonew = (flg ? 0 : 1);
1328: break;
1329: case MAT_NEW_NONZERO_LOCATION_ERR:
1330: a->nonew = (flg ? -1 : 0);
1331: break;
1332: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333: a->nonew = (flg ? -2 : 0);
1334: break;
1335: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1336: a->nounused = (flg ? -1 : 0);
1337: break;
1338: case MAT_NEW_DIAGONALS:
1339: case MAT_IGNORE_OFF_PROC_ENTRIES:
1340: case MAT_USE_HASH_TABLE:
1341: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1342: break;
1343: case MAT_SYMMETRIC:
1344: case MAT_STRUCTURALLY_SYMMETRIC:
1345: case MAT_HERMITIAN:
1346: case MAT_SYMMETRY_ETERNAL:
1347: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1348: break;
1349: default:
1350: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1351: }
1352: return(0);
1353: }
1357: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358: {
1359: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1361: PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1362: MatScalar *aa,*aa_i;
1363: PetscScalar *v_i;
1366: bs = A->rmap->bs;
1367: ai = a->i;
1368: aj = a->j;
1369: aa = a->a;
1370: bs2 = a->bs2;
1371:
1372: if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1373:
1374: bn = row/bs; /* Block number */
1375: bp = row % bs; /* Block Position */
1376: M = ai[bn+1] - ai[bn];
1377: *nz = bs*M;
1378:
1379: if (v) {
1380: *v = 0;
1381: if (*nz) {
1382: PetscMalloc((*nz)*sizeof(PetscScalar),v);
1383: for (i=0; i<M; i++) { /* for each block in the block row */
1384: v_i = *v + i*bs;
1385: aa_i = aa + bs2*(ai[bn] + i);
1386: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1387: }
1388: }
1389: }
1391: if (idx) {
1392: *idx = 0;
1393: if (*nz) {
1394: PetscMalloc((*nz)*sizeof(PetscInt),idx);
1395: for (i=0; i<M; i++) { /* for each block in the block row */
1396: idx_i = *idx + i*bs;
1397: itmp = bs*aj[ai[bn] + i];
1398: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1399: }
1400: }
1401: }
1402: return(0);
1403: }
1407: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408: {
1412: if (idx) {PetscFree(*idx);}
1413: if (v) {PetscFree(*v);}
1414: return(0);
1415: }
1421: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1422: {
1423: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
1424: Mat C;
1426: PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1427: PetscInt *rows,*cols,bs2=a->bs2;
1428: MatScalar *array;
1431: if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1432: if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1433: PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1434: PetscMemzero(col,(1+nbs)*sizeof(PetscInt));
1436: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1437: MatCreate(((PetscObject)A)->comm,&C);
1438: MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1439: MatSetType(C,((PetscObject)A)->type_name);
1440: MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1441: PetscFree(col);
1442: } else {
1443: C = *B;
1444: }
1446: array = a->a;
1447: PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1448: for (i=0; i<mbs; i++) {
1449: cols[0] = i*bs;
1450: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1451: len = ai[i+1] - ai[i];
1452: for (j=0; j<len; j++) {
1453: rows[0] = (*aj++)*bs;
1454: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1455: MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1456: array += bs2;
1457: }
1458: }
1459: PetscFree2(rows,cols);
1460:
1461: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1462: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1463:
1464: if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1465: *B = C;
1466: } else {
1467: MatHeaderCopy(A,C);
1468: }
1469: return(0);
1470: }
1474: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1475: {
1476: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1478: PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1479: int fd;
1480: PetscScalar *aa;
1481: FILE *file;
1484: PetscViewerBinaryGetDescriptor(viewer,&fd);
1485: PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1486: col_lens[0] = MAT_FILE_COOKIE;
1488: col_lens[1] = A->rmap->N;
1489: col_lens[2] = A->cmap->n;
1490: col_lens[3] = a->nz*bs2;
1492: /* store lengths of each row and write (including header) to file */
1493: count = 0;
1494: for (i=0; i<a->mbs; i++) {
1495: for (j=0; j<bs; j++) {
1496: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1497: }
1498: }
1499: PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1500: PetscFree(col_lens);
1502: /* store column indices (zero start index) */
1503: PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1504: count = 0;
1505: for (i=0; i<a->mbs; i++) {
1506: for (j=0; j<bs; j++) {
1507: for (k=a->i[i]; k<a->i[i+1]; k++) {
1508: for (l=0; l<bs; l++) {
1509: jj[count++] = bs*a->j[k] + l;
1510: }
1511: }
1512: }
1513: }
1514: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1515: PetscFree(jj);
1517: /* store nonzero values */
1518: PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1519: count = 0;
1520: for (i=0; i<a->mbs; i++) {
1521: for (j=0; j<bs; j++) {
1522: for (k=a->i[i]; k<a->i[i+1]; k++) {
1523: for (l=0; l<bs; l++) {
1524: aa[count++] = a->a[bs2*k + l*bs + j];
1525: }
1526: }
1527: }
1528: }
1529: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1530: PetscFree(aa);
1532: PetscViewerBinaryGetInfoPointer(viewer,&file);
1533: if (file) {
1534: fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1535: }
1536: return(0);
1537: }
1541: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1542: {
1543: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1544: PetscErrorCode ierr;
1545: PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1546: PetscViewerFormat format;
1549: PetscViewerGetFormat(viewer,&format);
1550: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1551: PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);
1552: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1553: Mat aij;
1554: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1555: MatView(aij,viewer);
1556: MatDestroy(aij);
1557: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1558: return(0);
1559: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1560: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1561: for (i=0; i<a->mbs; i++) {
1562: for (j=0; j<bs; j++) {
1563: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1564: for (k=a->i[i]; k<a->i[i+1]; k++) {
1565: for (l=0; l<bs; l++) {
1566: #if defined(PETSC_USE_COMPLEX)
1567: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1568: PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1569: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1570: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1571: PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1572: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1573: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1574: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1575: }
1576: #else
1577: if (a->a[bs2*k + l*bs + j] != 0.0) {
1578: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1579: }
1580: #endif
1581: }
1582: }
1583: PetscViewerASCIIPrintf(viewer,"\n");
1584: }
1585: }
1586: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1587: } else {
1588: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1589: for (i=0; i<a->mbs; i++) {
1590: for (j=0; j<bs; j++) {
1591: PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1592: for (k=a->i[i]; k<a->i[i+1]; k++) {
1593: for (l=0; l<bs; l++) {
1594: #if defined(PETSC_USE_COMPLEX)
1595: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1596: PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1597: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1598: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1599: PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1600: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1601: } else {
1602: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1603: }
1604: #else
1605: PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1606: #endif
1607: }
1608: }
1609: PetscViewerASCIIPrintf(viewer,"\n");
1610: }
1611: }
1612: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1613: }
1614: PetscViewerFlush(viewer);
1615: return(0);
1616: }
1620: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1621: {
1622: Mat A = (Mat) Aa;
1623: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
1625: PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1626: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1627: MatScalar *aa;
1628: PetscViewer viewer;
1632: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1633: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
1635: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
1637: /* loop over matrix elements drawing boxes */
1638: color = PETSC_DRAW_BLUE;
1639: for (i=0,row=0; i<mbs; i++,row+=bs) {
1640: for (j=a->i[i]; j<a->i[i+1]; j++) {
1641: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643: aa = a->a + j*bs2;
1644: for (k=0; k<bs; k++) {
1645: for (l=0; l<bs; l++) {
1646: if (PetscRealPart(*aa++) >= 0.) continue;
1647: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648: }
1649: }
1650: }
1651: }
1652: color = PETSC_DRAW_CYAN;
1653: for (i=0,row=0; i<mbs; i++,row+=bs) {
1654: for (j=a->i[i]; j<a->i[i+1]; j++) {
1655: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657: aa = a->a + j*bs2;
1658: for (k=0; k<bs; k++) {
1659: for (l=0; l<bs; l++) {
1660: if (PetscRealPart(*aa++) != 0.) continue;
1661: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1662: }
1663: }
1664: }
1665: }
1667: color = PETSC_DRAW_RED;
1668: for (i=0,row=0; i<mbs; i++,row+=bs) {
1669: for (j=a->i[i]; j<a->i[i+1]; j++) {
1670: y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671: x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672: aa = a->a + j*bs2;
1673: for (k=0; k<bs; k++) {
1674: for (l=0; l<bs; l++) {
1675: if (PetscRealPart(*aa++) <= 0.) continue;
1676: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677: }
1678: }
1679: }
1680: }
1681: return(0);
1682: }
1686: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687: {
1689: PetscReal xl,yl,xr,yr,w,h;
1690: PetscDraw draw;
1691: PetscTruth isnull;
1695: PetscViewerDrawGetDraw(viewer,0,&draw);
1696: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1698: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1699: xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700: xr += w; yr += h; xl = -w; yl = -h;
1701: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1702: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1703: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1704: return(0);
1705: }
1709: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710: {
1712: PetscTruth iascii,isbinary,isdraw;
1715: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1716: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1717: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1718: if (iascii){
1719: MatView_SeqBAIJ_ASCII(A,viewer);
1720: } else if (isbinary) {
1721: MatView_SeqBAIJ_Binary(A,viewer);
1722: } else if (isdraw) {
1723: MatView_SeqBAIJ_Draw(A,viewer);
1724: } else {
1725: Mat B;
1726: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1727: MatView(B,viewer);
1728: MatDestroy(B);
1729: }
1730: return(0);
1731: }
1736: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1737: {
1738: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1739: PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1740: PetscInt *ai = a->i,*ailen = a->ilen;
1741: PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1742: MatScalar *ap,*aa = a->a;
1745: for (k=0; k<m; k++) { /* loop over rows */
1746: row = im[k]; brow = row/bs;
1747: if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1748: if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1749: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1750: nrow = ailen[brow];
1751: for (l=0; l<n; l++) { /* loop over columns */
1752: if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1753: if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1754: col = in[l] ;
1755: bcol = col/bs;
1756: cidx = col%bs;
1757: ridx = row%bs;
1758: high = nrow;
1759: low = 0; /* assume unsorted */
1760: while (high-low > 5) {
1761: t = (low+high)/2;
1762: if (rp[t] > bcol) high = t;
1763: else low = t;
1764: }
1765: for (i=low; i<high; i++) {
1766: if (rp[i] > bcol) break;
1767: if (rp[i] == bcol) {
1768: *v++ = ap[bs2*i+bs*cidx+ridx];
1769: goto finished;
1770: }
1771: }
1772: *v++ = 0.0;
1773: finished:;
1774: }
1775: }
1776: return(0);
1777: }
1779: #define CHUNKSIZE 10
1782: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1783: {
1784: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1785: PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787: PetscErrorCode ierr;
1788: PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1789: PetscTruth roworiented=a->roworiented;
1790: const PetscScalar *value = v;
1791: MatScalar *ap,*aa = a->a,*bap;
1794: if (roworiented) {
1795: stepval = (n-1)*bs;
1796: } else {
1797: stepval = (m-1)*bs;
1798: }
1799: for (k=0; k<m; k++) { /* loop over added rows */
1800: row = im[k];
1801: if (row < 0) continue;
1802: #if defined(PETSC_USE_DEBUG)
1803: if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804: #endif
1805: rp = aj + ai[row];
1806: ap = aa + bs2*ai[row];
1807: rmax = imax[row];
1808: nrow = ailen[row];
1809: low = 0;
1810: high = nrow;
1811: for (l=0; l<n; l++) { /* loop over added columns */
1812: if (in[l] < 0) continue;
1813: #if defined(PETSC_USE_DEBUG)
1814: if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815: #endif
1816: col = in[l];
1817: if (roworiented) {
1818: value = v + k*(stepval+bs)*bs + l*bs;
1819: } else {
1820: value = v + l*(stepval+bs)*bs + k*bs;
1821: }
1822: if (col <= lastcol) low = 0; else high = nrow;
1823: lastcol = col;
1824: while (high-low > 7) {
1825: t = (low+high)/2;
1826: if (rp[t] > col) high = t;
1827: else low = t;
1828: }
1829: for (i=low; i<high; i++) {
1830: if (rp[i] > col) break;
1831: if (rp[i] == col) {
1832: bap = ap + bs2*i;
1833: if (roworiented) {
1834: if (is == ADD_VALUES) {
1835: for (ii=0; ii<bs; ii++,value+=stepval) {
1836: for (jj=ii; jj<bs2; jj+=bs) {
1837: bap[jj] += *value++;
1838: }
1839: }
1840: } else {
1841: for (ii=0; ii<bs; ii++,value+=stepval) {
1842: for (jj=ii; jj<bs2; jj+=bs) {
1843: bap[jj] = *value++;
1844: }
1845: }
1846: }
1847: } else {
1848: if (is == ADD_VALUES) {
1849: for (ii=0; ii<bs; ii++,value+=stepval) {
1850: for (jj=0; jj<bs; jj++) {
1851: *bap++ += *value++;
1852: }
1853: }
1854: } else {
1855: for (ii=0; ii<bs; ii++,value+=stepval) {
1856: for (jj=0; jj<bs; jj++) {
1857: *bap++ = *value++;
1858: }
1859: }
1860: }
1861: }
1862: goto noinsert2;
1863: }
1864: }
1865: if (nonew == 1) goto noinsert2;
1866: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868: N = nrow++ - 1; high++;
1869: /* shift up all the later entries in this row */
1870: for (ii=N; ii>=i; ii--) {
1871: rp[ii+1] = rp[ii];
1872: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1873: }
1874: if (N >= i) {
1875: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1876: }
1877: rp[i] = col;
1878: bap = ap + bs2*i;
1879: if (roworiented) {
1880: for (ii=0; ii<bs; ii++,value+=stepval) {
1881: for (jj=ii; jj<bs2; jj+=bs) {
1882: bap[jj] = *value++;
1883: }
1884: }
1885: } else {
1886: for (ii=0; ii<bs; ii++,value+=stepval) {
1887: for (jj=0; jj<bs; jj++) {
1888: *bap++ = *value++;
1889: }
1890: }
1891: }
1892: noinsert2:;
1893: low = i;
1894: }
1895: ailen[row] = nrow;
1896: }
1897: return(0);
1898: }
1902: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903: {
1904: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1905: PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906: PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen;
1908: PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909: MatScalar *aa = a->a,*ap;
1910: PetscReal ratio=0.6;
1913: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
1915: if (m) rmax = ailen[0];
1916: for (i=1; i<mbs; i++) {
1917: /* move each row back by the amount of empty slots (fshift) before it*/
1918: fshift += imax[i-1] - ailen[i-1];
1919: rmax = PetscMax(rmax,ailen[i]);
1920: if (fshift) {
1921: ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922: N = ailen[i];
1923: for (j=0; j<N; j++) {
1924: ip[j-fshift] = ip[j];
1925: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1926: }
1927: }
1928: ai[i] = ai[i-1] + ailen[i-1];
1929: }
1930: if (mbs) {
1931: fshift += imax[mbs-1] - ailen[mbs-1];
1932: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933: }
1934: /* reset ilen and imax for each row */
1935: for (i=0; i<mbs; i++) {
1936: ailen[i] = imax[i] = ai[i+1] - ai[i];
1937: }
1938: a->nz = ai[mbs];
1940: /* diagonals may have moved, so kill the diagonal pointers */
1941: a->idiagvalid = PETSC_FALSE;
1942: if (fshift && a->diag) {
1943: PetscFree(a->diag);
1944: PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1945: a->diag = 0;
1946: }
1947: if (fshift && a->nounused == -1) {
1948: SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1949: }
1950: PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1951: PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1952: PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1953: a->reallocs = 0;
1954: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1956: /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1957: if (a->compressedrow.use){
1958: Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1959: }
1961: A->same_nonzero = PETSC_TRUE;
1962: return(0);
1963: }
1965: /*
1966: This function returns an array of flags which indicate the locations of contiguous
1967: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1968: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1969: Assume: sizes should be long enough to hold all the values.
1970: */
1973: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1974: {
1975: PetscInt i,j,k,row;
1976: PetscTruth flg;
1979: for (i=0,j=0; i<n; j++) {
1980: row = idx[i];
1981: if (row%bs!=0) { /* Not the begining of a block */
1982: sizes[j] = 1;
1983: i++;
1984: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1985: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1986: i++;
1987: } else { /* Begining of the block, so check if the complete block exists */
1988: flg = PETSC_TRUE;
1989: for (k=1; k<bs; k++) {
1990: if (row+k != idx[i+k]) { /* break in the block */
1991: flg = PETSC_FALSE;
1992: break;
1993: }
1994: }
1995: if (flg) { /* No break in the bs */
1996: sizes[j] = bs;
1997: i+= bs;
1998: } else {
1999: sizes[j] = 1;
2000: i++;
2001: }
2002: }
2003: }
2004: *bs_max = j;
2005: return(0);
2006: }
2007:
2010: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2011: {
2012: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
2014: PetscInt i,j,k,count,*rows;
2015: PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2016: PetscScalar zero = 0.0;
2017: MatScalar *aa;
2020: /* Make a copy of the IS and sort it */
2021: /* allocate memory for rows,sizes */
2022: PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);
2024: /* copy IS values to rows, and sort them */
2025: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2026: PetscSortInt(is_n,rows);
2027: if (baij->keepnonzeropattern) {
2028: for (i=0; i<is_n; i++) { sizes[i] = 1; }
2029: bs_max = is_n;
2030: A->same_nonzero = PETSC_TRUE;
2031: } else {
2032: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2033: A->same_nonzero = PETSC_FALSE;
2034: }
2036: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2037: row = rows[j];
2038: if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2039: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2040: aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2041: if (sizes[i] == bs && !baij->keepnonzeropattern) {
2042: if (diag != 0.0) {
2043: if (baij->ilen[row/bs] > 0) {
2044: baij->ilen[row/bs] = 1;
2045: baij->j[baij->i[row/bs]] = row/bs;
2046: PetscMemzero(aa,count*bs*sizeof(MatScalar));
2047: }
2048: /* Now insert all the diagonal values for this bs */
2049: for (k=0; k<bs; k++) {
2050: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2051: }
2052: } else { /* (diag == 0.0) */
2053: baij->ilen[row/bs] = 0;
2054: } /* end (diag == 0.0) */
2055: } else { /* (sizes[i] != bs) */
2056: #if defined (PETSC_USE_DEBUG)
2057: if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2058: #endif
2059: for (k=0; k<count; k++) {
2060: aa[0] = zero;
2061: aa += bs;
2062: }
2063: if (diag != 0.0) {
2064: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2065: }
2066: }
2067: }
2069: PetscFree2(rows,sizes);
2070: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2071: return(0);
2072: }
2076: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2077: {
2078: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2079: PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2080: PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2081: PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2083: PetscInt ridx,cidx,bs2=a->bs2;
2084: PetscTruth roworiented=a->roworiented;
2085: MatScalar *ap,value,*aa=a->a,*bap;
2089: for (k=0; k<m; k++) { /* loop over added rows */
2090: row = im[k];
2091: brow = row/bs;
2092: if (row < 0) continue;
2093: #if defined(PETSC_USE_DEBUG)
2094: if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2095: #endif
2096: rp = aj + ai[brow];
2097: ap = aa + bs2*ai[brow];
2098: rmax = imax[brow];
2099: nrow = ailen[brow];
2100: low = 0;
2101: high = nrow;
2102: for (l=0; l<n; l++) { /* loop over added columns */
2103: if (in[l] < 0) continue;
2104: #if defined(PETSC_USE_DEBUG)
2105: if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2106: #endif
2107: col = in[l]; bcol = col/bs;
2108: ridx = row % bs; cidx = col % bs;
2109: if (roworiented) {
2110: value = v[l + k*n];
2111: } else {
2112: value = v[k + l*m];
2113: }
2114: if (col <= lastcol) low = 0; else high = nrow;
2115: lastcol = col;
2116: while (high-low > 7) {
2117: t = (low+high)/2;
2118: if (rp[t] > bcol) high = t;
2119: else low = t;
2120: }
2121: for (i=low; i<high; i++) {
2122: if (rp[i] > bcol) break;
2123: if (rp[i] == bcol) {
2124: bap = ap + bs2*i + bs*cidx + ridx;
2125: if (is == ADD_VALUES) *bap += value;
2126: else *bap = value;
2127: goto noinsert1;
2128: }
2129: }
2130: if (nonew == 1) goto noinsert1;
2131: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2132: MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2133: N = nrow++ - 1; high++;
2134: /* shift up all the later entries in this row */
2135: for (ii=N; ii>=i; ii--) {
2136: rp[ii+1] = rp[ii];
2137: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2138: }
2139: if (N>=i) {
2140: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2141: }
2142: rp[i] = bcol;
2143: ap[bs2*i + bs*cidx + ridx] = value;
2144: a->nz++;
2145: noinsert1:;
2146: low = i;
2147: }
2148: ailen[brow] = nrow;
2149: }
2150: A->same_nonzero = PETSC_FALSE;
2151: return(0);
2152: }
2156: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2157: {
2158: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
2159: Mat outA;
2161: PetscTruth row_identity,col_identity;
2164: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2165: ISIdentity(row,&row_identity);
2166: ISIdentity(col,&col_identity);
2167: if (!row_identity || !col_identity) {
2168: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2169: }
2171: outA = inA;
2172: inA->factor = MAT_FACTOR_LU;
2174: MatMarkDiagonal_SeqBAIJ(inA);
2176: PetscObjectReference((PetscObject)row);
2177: if (a->row) { ISDestroy(a->row); }
2178: a->row = row;
2179: PetscObjectReference((PetscObject)col);
2180: if (a->col) { ISDestroy(a->col); }
2181: a->col = col;
2182:
2183: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2184: if (a->icol) {
2185: ISDestroy(a->icol);
2186: }
2187: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2188: PetscLogObjectParent(inA,a->icol);
2189:
2190: MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));
2191: if (!a->solve_work) {
2192: PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2193: PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2194: }
2195: MatLUFactorNumeric(outA,inA,info);
2197: return(0);
2198: }
2203: PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2204: {
2205: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2206: PetscInt i,nz,mbs;
2209: nz = baij->maxnz/baij->bs2;
2210: mbs = baij->mbs;
2211: for (i=0; i<nz; i++) {
2212: baij->j[i] = indices[i];
2213: }
2214: baij->nz = nz;
2215: for (i=0; i<mbs; i++) {
2216: baij->ilen[i] = baij->imax[i];
2217: }
2218: return(0);
2219: }
2224: /*@
2225: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2226: in the matrix.
2228: Input Parameters:
2229: + mat - the SeqBAIJ matrix
2230: - indices - the column indices
2232: Level: advanced
2234: Notes:
2235: This can be called if you have precomputed the nonzero structure of the
2236: matrix and want to provide it to the matrix object to improve the performance
2237: of the MatSetValues() operation.
2239: You MUST have set the correct numbers of nonzeros per row in the call to
2240: MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2242: MUST be called before any calls to MatSetValues();
2244: @*/
2245: PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2246: {
2247: PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2252: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2253: if (f) {
2254: (*f)(mat,indices);
2255: } else {
2256: SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2257: }
2258: return(0);
2259: }
2263: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2264: {
2265: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2267: PetscInt i,j,n,row,bs,*ai,*aj,mbs;
2268: PetscReal atmp;
2269: PetscScalar *x,zero = 0.0;
2270: MatScalar *aa;
2271: PetscInt ncols,brow,krow,kcol;
2274: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2275: bs = A->rmap->bs;
2276: aa = a->a;
2277: ai = a->i;
2278: aj = a->j;
2279: mbs = a->mbs;
2281: VecSet(v,zero);
2282: VecGetArray(v,&x);
2283: VecGetLocalSize(v,&n);
2284: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2285: for (i=0; i<mbs; i++) {
2286: ncols = ai[1] - ai[0]; ai++;
2287: brow = bs*i;
2288: for (j=0; j<ncols; j++){
2289: for (kcol=0; kcol<bs; kcol++){
2290: for (krow=0; krow<bs; krow++){
2291: atmp = PetscAbsScalar(*aa);aa++;
2292: row = brow + krow; /* row index */
2293: /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2294: if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2295: }
2296: }
2297: aj++;
2298: }
2299: }
2300: VecRestoreArray(v,&x);
2301: return(0);
2302: }
2306: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2307: {
2311: /* If the two matrices have the same copy implementation, use fast copy. */
2312: if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2313: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2314: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2316: if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2317: SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2318: }
2319: PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
2320: } else {
2321: MatCopy_Basic(A,B,str);
2322: }
2323: return(0);
2324: }
2328: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2329: {
2333: MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2334: return(0);
2335: }
2339: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2340: {
2341: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2343: *array = a->a;
2344: return(0);
2345: }
2349: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2350: {
2352: return(0);
2353: }
2357: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2358: {
2359: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2361: PetscInt i,bs=Y->rmap->bs,j,bs2;
2362: PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz);
2365: if (str == SAME_NONZERO_PATTERN) {
2366: PetscScalar alpha = a;
2367: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2368: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2369: if (y->xtoy && y->XtoY != X) {
2370: PetscFree(y->xtoy);
2371: MatDestroy(y->XtoY);
2372: }
2373: if (!y->xtoy) { /* get xtoy */
2374: MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2375: y->XtoY = X;
2376: PetscObjectReference((PetscObject)X);
2377: }
2378: bs2 = bs*bs;
2379: for (i=0; i<x->nz; i++) {
2380: j = 0;
2381: while (j < bs2){
2382: y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2383: j++;
2384: }
2385: }
2386: PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2387: } else {
2388: MatAXPY_Basic(Y,a,X,str);
2389: }
2390: return(0);
2391: }
2395: PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2396: {
2397: PetscInt rbs,cbs;
2401: PetscLayoutGetBlockSize(A->rmap,&rbs);
2402: PetscLayoutGetBlockSize(A->cmap,&cbs);
2403: if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2404: if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2405: return(0);
2406: }
2410: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2411: {
2412: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2413: PetscInt i,nz = a->bs2*a->i[a->mbs];
2414: MatScalar *aa = a->a;
2417: for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2418: return(0);
2419: }
2423: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2424: {
2425: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2426: PetscInt i,nz = a->bs2*a->i[a->mbs];
2427: MatScalar *aa = a->a;
2430: for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2431: return(0);
2432: }
2438: /*
2439: Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2440: */
2441: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2442: {
2443: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2445: PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2446: PetscInt nz = a->i[m],row,*jj,mr,col;
2449: *nn = n;
2450: if (!ia) return(0);
2451: if (symmetric) {
2452: SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2453: } else {
2454: PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2455: PetscMemzero(collengths,n*sizeof(PetscInt));
2456: PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2457: PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2458: jj = a->j;
2459: for (i=0; i<nz; i++) {
2460: collengths[jj[i]]++;
2461: }
2462: cia[0] = oshift;
2463: for (i=0; i<n; i++) {
2464: cia[i+1] = cia[i] + collengths[i];
2465: }
2466: PetscMemzero(collengths,n*sizeof(PetscInt));
2467: jj = a->j;
2468: for (row=0; row<m; row++) {
2469: mr = a->i[row+1] - a->i[row];
2470: for (i=0; i<mr; i++) {
2471: col = *jj++;
2472: cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473: }
2474: }
2475: PetscFree(collengths);
2476: *ia = cia; *ja = cja;
2477: }
2478: return(0);
2479: }
2483: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2484: {
2488: if (!ia) return(0);
2489: PetscFree(*ia);
2490: PetscFree(*ja);
2491: return(0);
2492: }
2496: PetscErrorCode MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2497: {
2498: PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2500: PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2501: PetscScalar dx,*y,*xx,*w3_array;
2502: PetscScalar *vscale_array;
2503: PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2504: Vec w1=coloring->w1,w2=coloring->w2,w3;
2505: void *fctx = coloring->fctx;
2506: PetscTruth flg = PETSC_FALSE;
2507: PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0;
2508: Vec x1_tmp;
2514: if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2516: PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2517: MatSetUnfactored(J);
2518: PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);
2519: if (flg) {
2520: PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2521: } else {
2522: PetscTruth assembled;
2523: MatAssembled(J,&assembled);
2524: if (assembled) {
2525: MatZeroEntries(J);
2526: }
2527: }
2529: x1_tmp = x1;
2530: if (!coloring->vscale){
2531: VecDuplicate(x1_tmp,&coloring->vscale);
2532: }
2533:
2534: /*
2535: This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2536: coloring->F for the coarser grids from the finest
2537: */
2538: if (coloring->F) {
2539: VecGetLocalSize(coloring->F,&m1);
2540: VecGetLocalSize(w1,&m2);
2541: if (m1 != m2) {
2542: coloring->F = 0;
2543: }
2544: }
2546: if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2547: VecNorm(x1_tmp,NORM_2,&unorm);
2548: }
2549: VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2550:
2551: /* Set w1 = F(x1) */
2552: if (coloring->F) {
2553: w1 = coloring->F; /* use already computed value of function */
2554: coloring->F = 0;
2555: } else {
2556: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2557: (*f)(sctx,x1_tmp,w1,fctx);
2558: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2559: }
2560:
2561: if (!coloring->w3) {
2562: VecDuplicate(x1_tmp,&coloring->w3);
2563: PetscLogObjectParent(coloring,coloring->w3);
2564: }
2565: w3 = coloring->w3;
2567: CHKMEMQ;
2568: /* Compute all the local scale factors, including ghost points */
2569: VecGetLocalSize(x1_tmp,&N);
2570: VecGetArray(x1_tmp,&xx);
2571: VecGetArray(coloring->vscale,&vscale_array);
2572: if (ctype == IS_COLORING_GHOSTED){
2573: col_start = 0; col_end = N;
2574: } else if (ctype == IS_COLORING_GLOBAL){
2575: xx = xx - start;
2576: vscale_array = vscale_array - start;
2577: col_start = start; col_end = N + start;
2578: } CHKMEMQ;
2579: for (col=col_start; col<col_end; col++){
2580: /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2581: if (coloring->htype[0] == 'w') {
2582: dx = 1.0 + unorm;
2583: } else {
2584: dx = xx[col];
2585: }
2586: if (dx == 0.0) dx = 1.0;
2587: #if !defined(PETSC_USE_COMPLEX)
2588: if (dx < umin && dx >= 0.0) dx = umin;
2589: else if (dx < 0.0 && dx > -umin) dx = -umin;
2590: #else
2591: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2592: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2593: #endif
2594: dx *= epsilon;
2595: vscale_array[col] = 1.0/dx;
2596: } CHKMEMQ;
2597: if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start;
2598: VecRestoreArray(coloring->vscale,&vscale_array);
2599: if (ctype == IS_COLORING_GLOBAL){
2600: VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2601: VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2602: }
2603: CHKMEMQ;
2604: if (coloring->vscaleforrow) {
2605: vscaleforrow = coloring->vscaleforrow;
2606: } else {
2607: SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2608: }
2611: PetscMalloc(bs*sizeof(PetscInt),&srows);
2612: /*
2613: Loop over each color
2614: */
2615: VecGetArray(coloring->vscale,&vscale_array);
2616: for (k=0; k<coloring->ncolors; k++) {
2617: coloring->currentcolor = k;
2618: for (i=0; i<bs; i++) {
2619: VecCopy(x1_tmp,w3);
2620: VecGetArray(w3,&w3_array);
2621: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2622: /*
2623: Loop over each column associated with color
2624: adding the perturbation to the vector w3.
2625: */
2626: for (l=0; l<coloring->ncolumns[k]; l++) {
2627: col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */
2628: if (coloring->htype[0] == 'w') {
2629: dx = 1.0 + unorm;
2630: } else {
2631: dx = xx[col];
2632: }
2633: if (dx == 0.0) dx = 1.0;
2634: #if !defined(PETSC_USE_COMPLEX)
2635: if (dx < umin && dx >= 0.0) dx = umin;
2636: else if (dx < 0.0 && dx > -umin) dx = -umin;
2637: #else
2638: if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin;
2639: else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2640: #endif
2641: dx *= epsilon;
2642: if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2643: w3_array[col] += dx;
2644: }
2645: if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2646: VecRestoreArray(w3,&w3_array);
2648: /*
2649: Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2650: w2 = F(x1 + dx) - F(x1)
2651: */
2652: PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2653: (*f)(sctx,w3,w2,fctx);
2654: PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2655: VecAXPY(w2,-1.0,w1);
2656:
2657: /*
2658: Loop over rows of vector, putting results into Jacobian matrix
2659: */
2660: VecGetArray(w2,&y);
2661: for (l=0; l<coloring->nrows[k]; l++) {
2662: row = bs*coloring->rows[k][l]; /* local row index */
2663: col = i + bs*coloring->columnsforrow[k][l]; /* global column index */
2664: for (j=0; j<bs; j++) {
2665: y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2666: srows[j] = row + start + j;
2667: }
2668: MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2669: }
2670: VecRestoreArray(w2,&y);
2671: }
2672: } /* endof for each color */
2673: if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2674: VecRestoreArray(coloring->vscale,&vscale_array);
2675: VecRestoreArray(x1_tmp,&xx);
2676: PetscFree(srows);
2677:
2678: coloring->currentcolor = -1;
2679: MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2680: MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2681: PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2682: return(0);
2683: }
2685: /* -------------------------------------------------------------------*/
2686: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2687: MatGetRow_SeqBAIJ,
2688: MatRestoreRow_SeqBAIJ,
2689: MatMult_SeqBAIJ_N,
2690: /* 4*/ MatMultAdd_SeqBAIJ_N,
2691: MatMultTranspose_SeqBAIJ,
2692: MatMultTransposeAdd_SeqBAIJ,
2693: 0,
2694: 0,
2695: 0,
2696: /*10*/ 0,
2697: MatLUFactor_SeqBAIJ,
2698: 0,
2699: 0,
2700: MatTranspose_SeqBAIJ,
2701: /*15*/ MatGetInfo_SeqBAIJ,
2702: MatEqual_SeqBAIJ,
2703: MatGetDiagonal_SeqBAIJ,
2704: MatDiagonalScale_SeqBAIJ,
2705: MatNorm_SeqBAIJ,
2706: /*20*/ 0,
2707: MatAssemblyEnd_SeqBAIJ,
2708: MatSetOption_SeqBAIJ,
2709: MatZeroEntries_SeqBAIJ,
2710: /*24*/ MatZeroRows_SeqBAIJ,
2711: 0,
2712: 0,
2713: 0,
2714: 0,
2715: /*29*/ MatSetUpPreallocation_SeqBAIJ,
2716: 0,
2717: 0,
2718: MatGetArray_SeqBAIJ,
2719: MatRestoreArray_SeqBAIJ,
2720: /*34*/ MatDuplicate_SeqBAIJ,
2721: 0,
2722: 0,
2723: MatILUFactor_SeqBAIJ,
2724: 0,
2725: /*39*/ MatAXPY_SeqBAIJ,
2726: MatGetSubMatrices_SeqBAIJ,
2727: MatIncreaseOverlap_SeqBAIJ,
2728: MatGetValues_SeqBAIJ,
2729: MatCopy_SeqBAIJ,
2730: /*44*/ 0,
2731: MatScale_SeqBAIJ,
2732: 0,
2733: 0,
2734: 0,
2735: /*49*/ MatSetBlockSize_SeqBAIJ,
2736: MatGetRowIJ_SeqBAIJ,
2737: MatRestoreRowIJ_SeqBAIJ,
2738: MatGetColumnIJ_SeqBAIJ,
2739: MatRestoreColumnIJ_SeqBAIJ,
2740: /*54*/ MatFDColoringCreate_SeqAIJ,
2741: 0,
2742: 0,
2743: 0,
2744: MatSetValuesBlocked_SeqBAIJ,
2745: /*59*/ MatGetSubMatrix_SeqBAIJ,
2746: MatDestroy_SeqBAIJ,
2747: MatView_SeqBAIJ,
2748: 0,
2749: 0,
2750: /*64*/ 0,
2751: 0,
2752: 0,
2753: 0,
2754: 0,
2755: /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2756: 0,
2757: MatConvert_Basic,
2758: 0,
2759: 0,
2760: /*74*/ 0,
2761: MatFDColoringApply_BAIJ,
2762: 0,
2763: 0,
2764: 0,
2765: /*79*/ 0,
2766: 0,
2767: 0,
2768: 0,
2769: MatLoad_SeqBAIJ,
2770: /*84*/ 0,
2771: 0,
2772: 0,
2773: 0,
2774: 0,
2775: /*89*/ 0,
2776: 0,
2777: 0,
2778: 0,
2779: 0,
2780: /*94*/ 0,
2781: 0,
2782: 0,
2783: 0,
2784: 0,
2785: /*99*/0,
2786: 0,
2787: 0,
2788: 0,
2789: 0,
2790: /*104*/0,
2791: MatRealPart_SeqBAIJ,
2792: MatImaginaryPart_SeqBAIJ,
2793: 0,
2794: 0,
2795: /*109*/0,
2796: 0,
2797: 0,
2798: 0,
2799: MatMissingDiagonal_SeqBAIJ,
2800: /*114*/0,
2801: 0,
2802: 0,
2803: 0,
2804: 0,
2805: /*119*/0,
2806: 0,
2807: MatMultHermitianTranspose_SeqBAIJ,
2808: MatMultHermitianTransposeAdd_SeqBAIJ
2809: };
2814: PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2815: {
2816: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2817: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2821: if (aij->nonew != 1) {
2822: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2823: }
2825: /* allocate space for values if not already there */
2826: if (!aij->saved_values) {
2827: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2828: PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
2829: }
2831: /* copy values over */
2832: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2833: return(0);
2834: }
2840: PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2841: {
2842: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2844: PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2847: if (aij->nonew != 1) {
2848: SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2849: }
2850: if (!aij->saved_values) {
2851: SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2852: }
2854: /* copy values over */
2855: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2856: return(0);
2857: }
2868: PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2869: {
2870: Mat_SeqBAIJ *b;
2872: PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2873: PetscTruth flg,skipallocation = PETSC_FALSE;
2877: if (nz == MAT_SKIP_ALLOCATION) {
2878: skipallocation = PETSC_TRUE;
2879: nz = 0;
2880: }
2882: if (bs < 0) {
2883: PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2884: PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2885: PetscOptionsEnd();
2886: bs = PetscAbs(bs);
2887: }
2888: if (nnz && newbs != bs) {
2889: SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2890: }
2891: bs = newbs;
2893: PetscLayoutSetBlockSize(B->rmap,bs);
2894: PetscLayoutSetBlockSize(B->cmap,bs);
2895: PetscLayoutSetUp(B->rmap);
2896: PetscLayoutSetUp(B->cmap);
2898: B->preallocated = PETSC_TRUE;
2900: mbs = B->rmap->n/bs;
2901: nbs = B->cmap->n/bs;
2902: bs2 = bs*bs;
2904: if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2905: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2906: }
2908: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2909: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2910: if (nnz) {
2911: for (i=0; i<mbs; i++) {
2912: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2913: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2914: }
2915: }
2917: b = (Mat_SeqBAIJ*)B->data;
2918: PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2919: PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2920: PetscOptionsEnd();
2922: if (!flg) {
2923: switch (bs) {
2924: case 1:
2925: B->ops->mult = MatMult_SeqBAIJ_1;
2926: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2927: B->ops->sor = MatSOR_SeqBAIJ_1;
2928: break;
2929: case 2:
2930: B->ops->mult = MatMult_SeqBAIJ_2;
2931: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2932: B->ops->sor = MatSOR_SeqBAIJ_2;
2933: break;
2934: case 3:
2935: B->ops->mult = MatMult_SeqBAIJ_3;
2936: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2937: B->ops->sor = MatSOR_SeqBAIJ_3;
2938: break;
2939: case 4:
2940: B->ops->mult = MatMult_SeqBAIJ_4;
2941: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2942: B->ops->sor = MatSOR_SeqBAIJ_4;
2943: break;
2944: case 5:
2945: B->ops->mult = MatMult_SeqBAIJ_5;
2946: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2947: B->ops->sor = MatSOR_SeqBAIJ_5;
2948: break;
2949: case 6:
2950: B->ops->mult = MatMult_SeqBAIJ_6;
2951: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2952: B->ops->sor = MatSOR_SeqBAIJ_6;
2953: break;
2954: case 7:
2955: B->ops->mult = MatMult_SeqBAIJ_7;
2956: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2957: B->ops->sor = MatSOR_SeqBAIJ_7;
2958: break;
2959: case 15:
2960: B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2961: break;
2962: default:
2963: B->ops->mult = MatMult_SeqBAIJ_N;
2964: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2965: break;
2966: }
2967: }
2968: B->rmap->bs = bs;
2969: b->mbs = mbs;
2970: b->nbs = nbs;
2971: if (!skipallocation) {
2972: if (!b->imax) {
2973: PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2974: PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2975: b->free_imax_ilen = PETSC_TRUE;
2976: }
2977: /* b->ilen will count nonzeros in each block row so far. */
2978: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2979: if (!nnz) {
2980: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2981: else if (nz <= 0) nz = 1;
2982: for (i=0; i<mbs; i++) b->imax[i] = nz;
2983: nz = nz*mbs;
2984: } else {
2985: nz = 0;
2986: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2987: }
2989: /* allocate the matrix space */
2990: MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2991: PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2992: PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2993: PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2994: PetscMemzero(b->j,nz*sizeof(PetscInt));
2995: b->singlemalloc = PETSC_TRUE;
2996: b->i[0] = 0;
2997: for (i=1; i<mbs+1; i++) {
2998: b->i[i] = b->i[i-1] + b->imax[i-1];
2999: }
3000: b->free_a = PETSC_TRUE;
3001: b->free_ij = PETSC_TRUE;
3002: } else {
3003: b->free_a = PETSC_FALSE;
3004: b->free_ij = PETSC_FALSE;
3005: }
3007: B->rmap->bs = bs;
3008: b->bs2 = bs2;
3009: b->mbs = mbs;
3010: b->nz = 0;
3011: b->maxnz = nz*bs2;
3012: B->info.nz_unneeded = (PetscReal)b->maxnz;
3013: return(0);
3014: }
3020: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3021: {
3022: PetscInt i,m,nz,nz_max=0,*nnz;
3023: PetscScalar *values=0;
3028: if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3030: PetscLayoutSetBlockSize(B->rmap,bs);
3031: PetscLayoutSetBlockSize(B->cmap,bs);
3032: PetscLayoutSetUp(B->rmap);
3033: PetscLayoutSetUp(B->cmap);
3034: m = B->rmap->n/bs;
3036: if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3037: PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3038: for(i=0; i<m; i++) {
3039: nz = ii[i+1]- ii[i];
3040: if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3041: nz_max = PetscMax(nz_max, nz);
3042: nnz[i] = nz;
3043: }
3044: MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3045: PetscFree(nnz);
3047: values = (PetscScalar*)V;
3048: if (!values) {
3049: PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3050: PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3051: }
3052: for (i=0; i<m; i++) {
3053: PetscInt ncols = ii[i+1] - ii[i];
3054: const PetscInt *icols = jj + ii[i];
3055: const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3056: MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3057: }
3058: if (!V) { PetscFree(values); }
3059: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3060: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3062: return(0);
3063: }
3072: /*MC
3073: MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3074: block sparse compressed row format.
3076: Options Database Keys:
3077: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3079: Level: beginner
3081: .seealso: MatCreateSeqBAIJ()
3082: M*/
3088: PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3089: {
3091: PetscMPIInt size;
3092: Mat_SeqBAIJ *b;
3095: MPI_Comm_size(((PetscObject)B)->comm,&size);
3096: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3098: PetscNewLog(B,Mat_SeqBAIJ,&b);
3099: B->data = (void*)b;
3100: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3101: B->mapping = 0;
3102: b->row = 0;
3103: b->col = 0;
3104: b->icol = 0;
3105: b->reallocs = 0;
3106: b->saved_values = 0;
3108: b->roworiented = PETSC_TRUE;
3109: b->nonew = 0;
3110: b->diag = 0;
3111: b->solve_work = 0;
3112: b->mult_work = 0;
3113: B->spptr = 0;
3114: B->info.nz_unneeded = (PetscReal)b->maxnz;
3115: b->keepnonzeropattern = PETSC_FALSE;
3116: b->xtoy = 0;
3117: b->XtoY = 0;
3118: b->compressedrow.use = PETSC_FALSE;
3119: b->compressedrow.nrows = 0;
3120: b->compressedrow.i = PETSC_NULL;
3121: b->compressedrow.rindex = PETSC_NULL;
3122: b->compressedrow.checked = PETSC_FALSE;
3123: B->same_nonzero = PETSC_FALSE;
3125: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3126: "MatGetFactorAvailable_seqbaij_petsc",
3127: MatGetFactorAvailable_seqbaij_petsc);
3128: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3129: "MatGetFactor_seqbaij_petsc",
3130: MatGetFactor_seqbaij_petsc);
3131: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3132: "MatInvertBlockDiagonal_SeqBAIJ",
3133: MatInvertBlockDiagonal_SeqBAIJ);
3134: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3135: "MatStoreValues_SeqBAIJ",
3136: MatStoreValues_SeqBAIJ);
3137: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3138: "MatRetrieveValues_SeqBAIJ",
3139: MatRetrieveValues_SeqBAIJ);
3140: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3141: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3142: MatSeqBAIJSetColumnIndices_SeqBAIJ);
3143: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3144: "MatConvert_SeqBAIJ_SeqAIJ",
3145: MatConvert_SeqBAIJ_SeqAIJ);
3146: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3147: "MatConvert_SeqBAIJ_SeqSBAIJ",
3148: MatConvert_SeqBAIJ_SeqSBAIJ);
3149: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3150: "MatSeqBAIJSetPreallocation_SeqBAIJ",
3151: MatSeqBAIJSetPreallocation_SeqBAIJ);
3152: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3153: "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3154: MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3155: PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3156: return(0);
3157: }
3162: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3163: {
3164: Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3166: PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3169: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3171: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172: c->imax = a->imax;
3173: c->ilen = a->ilen;
3174: c->free_imax_ilen = PETSC_FALSE;
3175: } else {
3176: PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3177: PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3178: for (i=0; i<mbs; i++) {
3179: c->imax[i] = a->imax[i];
3180: c->ilen[i] = a->ilen[i];
3181: }
3182: c->free_imax_ilen = PETSC_TRUE;
3183: }
3185: /* allocate the matrix space */
3186: if (mallocmatspace){
3187: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3188: PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3189: PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3190: c->singlemalloc = PETSC_FALSE;
3191: c->free_ij = PETSC_FALSE;
3192: c->i = a->i;
3193: c->j = a->j;
3194: c->parent = A;
3195: PetscObjectReference((PetscObject)A);
3196: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3197: MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3198: } else {
3199: PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3200: PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3201: c->singlemalloc = PETSC_TRUE;
3202: c->free_ij = PETSC_TRUE;
3203: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3204: if (mbs > 0) {
3205: PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3206: if (cpvalues == MAT_COPY_VALUES) {
3207: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3208: } else {
3209: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3210: }
3211: }
3212: }
3213: }
3215: c->roworiented = a->roworiented;
3216: c->nonew = a->nonew;
3217: PetscLayoutCopy(A->rmap,&C->rmap);
3218: PetscLayoutCopy(A->cmap,&C->cmap);
3219: c->bs2 = a->bs2;
3220: c->mbs = a->mbs;
3221: c->nbs = a->nbs;
3223: if (a->diag) {
3224: if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225: c->diag = a->diag;
3226: c->free_diag = PETSC_FALSE;
3227: } else {
3228: PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3229: PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3230: for (i=0; i<mbs; i++) {
3231: c->diag[i] = a->diag[i];
3232: }
3233: c->free_diag = PETSC_TRUE;
3234: }
3235: } else c->diag = 0;
3236: c->nz = a->nz;
3237: c->maxnz = a->maxnz;
3238: c->solve_work = 0;
3239: c->mult_work = 0;
3240: c->free_a = PETSC_TRUE;
3241: c->free_ij = PETSC_TRUE;
3242: C->preallocated = PETSC_TRUE;
3243: C->assembled = PETSC_TRUE;
3245: c->compressedrow.use = a->compressedrow.use;
3246: c->compressedrow.nrows = a->compressedrow.nrows;
3247: c->compressedrow.checked = a->compressedrow.checked;
3248: if (a->compressedrow.checked && a->compressedrow.use){
3249: i = a->compressedrow.nrows;
3250: PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3251: PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3252: PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3253: PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3254: } else {
3255: c->compressedrow.use = PETSC_FALSE;
3256: c->compressedrow.i = PETSC_NULL;
3257: c->compressedrow.rindex = PETSC_NULL;
3258: }
3259: C->same_nonzero = A->same_nonzero;
3260: PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3261: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3262: return(0);
3263: }
3267: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3268: {
3272: MatCreate(((PetscObject)A)->comm,B);
3273: MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3274: MatSetType(*B,MATSEQBAIJ);
3275: MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3276: return(0);
3277: }
3281: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3282: {
3283: Mat_SeqBAIJ *a;
3284: Mat B;
3286: PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1;
3287: PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3288: PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows;
3289: PetscInt *masked,nmask,tmp,bs2,ishift;
3290: PetscMPIInt size;
3291: int fd;
3292: PetscScalar *aa;
3293: MPI_Comm comm = ((PetscObject)viewer)->comm;
3296: PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
3297: PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3298: PetscOptionsEnd();
3299: bs2 = bs*bs;
3301: MPI_Comm_size(comm,&size);
3302: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3303: PetscViewerBinaryGetDescriptor(viewer,&fd);
3304: PetscBinaryRead(fd,header,4,PETSC_INT);
3305: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3306: M = header[1]; N = header[2]; nz = header[3];
3308: if (header[3] < 0) {
3309: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3310: }
3312: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3314: /*
3315: This code adds extra rows to make sure the number of rows is
3316: divisible by the blocksize
3317: */
3318: mbs = M/bs;
3319: extra_rows = bs - M + bs*(mbs);
3320: if (extra_rows == bs) extra_rows = 0;
3321: else mbs++;
3322: if (extra_rows) {
3323: PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3324: }
3326: /* read in row lengths */
3327: PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3328: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3329: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3331: /* read in column indices */
3332: PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3333: PetscBinaryRead(fd,jj,nz,PETSC_INT);
3334: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3336: /* loop over row lengths determining block row lengths */
3337: PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3338: PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3339: PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3340: PetscMemzero(mask,mbs*sizeof(PetscInt));
3341: rowcount = 0;
3342: nzcount = 0;
3343: for (i=0; i<mbs; i++) {
3344: nmask = 0;
3345: for (j=0; j<bs; j++) {
3346: kmax = rowlengths[rowcount];
3347: for (k=0; k<kmax; k++) {
3348: tmp = jj[nzcount++]/bs;
3349: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3350: }
3351: rowcount++;
3352: }
3353: browlengths[i] += nmask;
3354: /* zero out the mask elements we set */
3355: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356: }
3358: /* create our matrix */
3359: MatCreate(comm,&B);
3360: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3361: MatSetType(B,type);
3362: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3363: a = (Mat_SeqBAIJ*)B->data;
3365: /* set matrix "i" values */
3366: a->i[0] = 0;
3367: for (i=1; i<= mbs; i++) {
3368: a->i[i] = a->i[i-1] + browlengths[i-1];
3369: a->ilen[i-1] = browlengths[i-1];
3370: }
3371: a->nz = 0;
3372: for (i=0; i<mbs; i++) a->nz += browlengths[i];
3374: /* read in nonzero values */
3375: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3376: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3377: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3379: /* set "a" and "j" values into matrix */
3380: nzcount = 0; jcount = 0;
3381: for (i=0; i<mbs; i++) {
3382: nzcountb = nzcount;
3383: nmask = 0;
3384: for (j=0; j<bs; j++) {
3385: kmax = rowlengths[i*bs+j];
3386: for (k=0; k<kmax; k++) {
3387: tmp = jj[nzcount++]/bs;
3388: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3389: }
3390: }
3391: /* sort the masked values */
3392: PetscSortInt(nmask,masked);
3394: /* set "j" values into matrix */
3395: maskcount = 1;
3396: for (j=0; j<nmask; j++) {
3397: a->j[jcount++] = masked[j];
3398: mask[masked[j]] = maskcount++;
3399: }
3400: /* set "a" values into matrix */
3401: ishift = bs2*a->i[i];
3402: for (j=0; j<bs; j++) {
3403: kmax = rowlengths[i*bs+j];
3404: for (k=0; k<kmax; k++) {
3405: tmp = jj[nzcountb]/bs ;
3406: block = mask[tmp] - 1;
3407: point = jj[nzcountb] - bs*tmp;
3408: idx = ishift + bs2*block + j + bs*point;
3409: a->a[idx] = (MatScalar)aa[nzcountb++];
3410: }
3411: }
3412: /* zero out the mask elements we set */
3413: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3414: }
3415: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3417: PetscFree(rowlengths);
3418: PetscFree(browlengths);
3419: PetscFree(aa);
3420: PetscFree(jj);
3421: PetscFree2(mask,masked);
3423: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3424: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3425: MatView_Private(B);
3427: *A = B;
3428: return(0);
3429: }
3433: /*@C
3434: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3435: compressed row) format. For good matrix assembly performance the
3436: user should preallocate the matrix storage by setting the parameter nz
3437: (or the array nnz). By setting these parameters accurately, performance
3438: during matrix assembly can be increased by more than a factor of 50.
3440: Collective on MPI_Comm
3442: Input Parameters:
3443: + comm - MPI communicator, set to PETSC_COMM_SELF
3444: . bs - size of block
3445: . m - number of rows
3446: . n - number of columns
3447: . nz - number of nonzero blocks per block row (same for all rows)
3448: - nnz - array containing the number of nonzero blocks in the various block rows
3449: (possibly different for each block row) or PETSC_NULL
3451: Output Parameter:
3452: . A - the matrix
3454: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455: MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3458: Options Database Keys:
3459: . -mat_no_unroll - uses code that does not unroll the loops in the
3460: block calculations (much slower)
3461: . -mat_block_size - size of the blocks to use
3463: Level: intermediate
3465: Notes:
3466: The number of rows and columns must be divisible by blocksize.
3468: If the nnz parameter is given then the nz parameter is ignored
3470: A nonzero block is any block that as 1 or more nonzeros in it
3472: The block AIJ format is fully compatible with standard Fortran 77
3473: storage. That is, the stored row and column indices can begin at
3474: either one (as in Fortran) or zero. See the users' manual for details.
3476: Specify the preallocated storage with either nz or nnz (not both).
3477: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3478: allocation. For additional details, see the users manual chapter on
3479: matrices.
3481: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3482: @*/
3483: PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3484: {
3486:
3488: MatCreate(comm,A);
3489: MatSetSizes(*A,m,n,m,n);
3490: MatSetType(*A,MATSEQBAIJ);
3491: MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3492: return(0);
3493: }
3497: /*@C
3498: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3499: per row in the matrix. For good matrix assembly performance the
3500: user should preallocate the matrix storage by setting the parameter nz
3501: (or the array nnz). By setting these parameters accurately, performance
3502: during matrix assembly can be increased by more than a factor of 50.
3504: Collective on MPI_Comm
3506: Input Parameters:
3507: + A - the matrix
3508: . bs - size of block
3509: . nz - number of block nonzeros per block row (same for all rows)
3510: - nnz - array containing the number of block nonzeros in the various block rows
3511: (possibly different for each block row) or PETSC_NULL
3513: Options Database Keys:
3514: . -mat_no_unroll - uses code that does not unroll the loops in the
3515: block calculations (much slower)
3516: . -mat_block_size - size of the blocks to use
3518: Level: intermediate
3520: Notes:
3521: If the nnz parameter is given then the nz parameter is ignored
3523: You can call MatGetInfo() to get information on how effective the preallocation was;
3524: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3525: You can also run with the option -info and look for messages with the string
3526: malloc in them to see if additional memory allocation was needed.
3528: The block AIJ format is fully compatible with standard Fortran 77
3529: storage. That is, the stored row and column indices can begin at
3530: either one (as in Fortran) or zero. See the users' manual for details.
3532: Specify the preallocated storage with either nz or nnz (not both).
3533: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3534: allocation. For additional details, see the users manual chapter on
3535: matrices.
3537: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3538: @*/
3539: PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3540: {
3541: PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3544: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3545: if (f) {
3546: (*f)(B,bs,nz,nnz);
3547: }
3548: return(0);
3549: }
3553: /*@C
3554: MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3555: (the default sequential PETSc format).
3557: Collective on MPI_Comm
3559: Input Parameters:
3560: + A - the matrix
3561: . i - the indices into j for the start of each local row (starts with zero)
3562: . j - the column indices for each local row (starts with zero) these must be sorted for each row
3563: - v - optional values in the matrix
3565: Level: developer
3567: .keywords: matrix, aij, compressed row, sparse
3569: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3570: @*/
3571: PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3572: {
3573: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3576: PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);
3577: if (f) {
3578: (*f)(B,bs,i,j,v);
3579: }
3580: return(0);
3581: }
3586: /*@
3587: MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3588: (upper triangular entries in CSR format) provided by the user.
3590: Collective on MPI_Comm
3592: Input Parameters:
3593: + comm - must be an MPI communicator of size 1
3594: . bs - size of block
3595: . m - number of rows
3596: . n - number of columns
3597: . i - row indices
3598: . j - column indices
3599: - a - matrix values
3601: Output Parameter:
3602: . mat - the matrix
3604: Level: intermediate
3606: Notes:
3607: The i, j, and a arrays are not copied by this routine, the user must free these arrays
3608: once the matrix is destroyed
3610: You cannot set new nonzero locations into this matrix, that will generate an error.
3612: The i and j indices are 0 based
3614: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3616: @*/
3617: PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3618: {
3620: PetscInt ii;
3621: Mat_SeqBAIJ *baij;
3624: if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3625: if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3626:
3627: MatCreate(comm,mat);
3628: MatSetSizes(*mat,m,n,m,n);
3629: MatSetType(*mat,MATSEQBAIJ);
3630: MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3631: baij = (Mat_SeqBAIJ*)(*mat)->data;
3632: PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3633: PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));
3635: baij->i = i;
3636: baij->j = j;
3637: baij->a = a;
3638: baij->singlemalloc = PETSC_FALSE;
3639: baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3640: baij->free_a = PETSC_FALSE;
3641: baij->free_ij = PETSC_FALSE;
3643: for (ii=0; ii<m; ii++) {
3644: baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3645: #if defined(PETSC_USE_DEBUG)
3646: if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3647: #endif
3648: }
3649: #if defined(PETSC_USE_DEBUG)
3650: for (ii=0; ii<baij->i[m]; ii++) {
3651: if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3652: if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3653: }
3654: #endif
3656: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3657: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3658: return(0);
3659: }