Actual source code: baij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include ../src/mat/impls/baij/seq/baij.h
 8:  #include petscblaslapack.h
 9:  #include ../src/mat/blockinvert.h

 13: /*@
 14:   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.

 16:   Collective on Mat

 18:   Input Parameters:
 19: . mat - the matrix

 21:   Level: advanced
 22: @*/
 23: PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
 24: {
 25:   PetscErrorCode ierr,(*f)(Mat);

 29:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
 30:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");

 32:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);
 33:   if (f) {
 34:     (*f)(mat);
 35:   } else {
 36:     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
 37:   }
 38:   return(0);
 39: }

 44: PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
 45: {
 46:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
 48:   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5];
 49:   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25];
 50:   PetscReal      shift = 0.0;

 53:   if (a->idiagvalid) return(0);
 54:   MatMarkDiagonal_SeqBAIJ(A);
 55:   diag_offset = a->diag;
 56:   if (!a->idiag) {
 57:     PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);
 58:     PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));
 59:   }
 60:   diag  = a->idiag;
 61:   mdiag = a->idiag+bs*bs*mbs;
 62:   /* factor and invert each block */
 63:   switch (bs){
 64:     case 1:
 65:       for (i=0; i<mbs; i++) {
 66:         odiag = v + 1*diag_offset[i];
 67:         diag[0]  = odiag[0];
 68:         mdiag[0] = odiag[0];
 69:         diag[0]  = 1.0 / (diag[0] + shift);
 70:         diag    += 1;
 71:         mdiag   += 1;
 72:       }
 73:       break;
 74:     case 2:
 75:       for (i=0; i<mbs; i++) {
 76:         odiag   = v + 4*diag_offset[i];
 77:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 78:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 79:         Kernel_A_gets_inverse_A_2(diag,shift);
 80:         diag    += 4;
 81:         mdiag   += 4;
 82:       }
 83:       break;
 84:     case 3:
 85:       for (i=0; i<mbs; i++) {
 86:         odiag    = v + 9*diag_offset[i];
 87:         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
 88:         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
 89:         diag[8]  = odiag[8];
 90:         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
 91:         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
 92:         mdiag[8] = odiag[8];
 93:         Kernel_A_gets_inverse_A_3(diag,shift);
 94:         diag    += 9;
 95:         mdiag   += 9;
 96:       }
 97:       break;
 98:     case 4:
 99:       for (i=0; i<mbs; i++) {
100:         odiag  = v + 16*diag_offset[i];
101:         PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));
102:         PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));
103:         Kernel_A_gets_inverse_A_4(diag,shift);
104:         diag  += 16;
105:         mdiag += 16;
106:       }
107:       break;
108:     case 5:
109:       for (i=0; i<mbs; i++) {
110:         odiag = v + 25*diag_offset[i];
111:         PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));
112:         PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));
113:         Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);
114:         diag  += 25;
115:         mdiag += 25;
116:       }
117:       break;
118:     case 6:
119:       for (i=0; i<mbs; i++) {
120:         odiag = v + 36*diag_offset[i];
121:         PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));
122:         PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));
123:         Kernel_A_gets_inverse_A_6(diag,shift);
124:         diag  += 36;
125:         mdiag += 36;
126:       }
127:       break;
128:     default:
129:       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
130:   }
131:   a->idiagvalid = PETSC_TRUE;
132:   return(0);
133: }

138: PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
139: {
140:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
141:   PetscScalar        *x,x1,s1;
142:   const PetscScalar  *b;
143:   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
144:   PetscErrorCode     ierr;
145:   PetscInt           m = a->mbs,i,i2,nz,j;
146:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

149:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
150:   its = its*lits;
151:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
152:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
153:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
154:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
155:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

157:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

159:   diag  = a->diag;
160:   idiag = a->idiag;
161:   VecGetArray(xx,&x);
162:   VecGetArray(bb,(PetscScalar**)&b);

164:   if (flag & SOR_ZERO_INITIAL_GUESS) {
165:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
166:       x[0] = b[0]*idiag[0];
167:       i2     = 1;
168:       idiag += 1;
169:       for (i=1; i<m; i++) {
170:         v     = aa + ai[i];
171:         vi    = aj + ai[i];
172:         nz    = diag[i] - ai[i];
173:         s1    = b[i2];
174:         for (j=0; j<nz; j++) {
175:           s1 -= v[j]*x[vi[j]];
176:         }
177:         x[i2]   = idiag[0]*s1;
178:         idiag   += 1;
179:         i2      += 1;
180:       }
181:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
182:       PetscLogFlops(a->nz);
183:     }
184:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
185:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
186:       i2    = 0;
187:       mdiag = a->idiag+a->mbs;
188:       for (i=0; i<m; i++) {
189:         x1      = x[i2];
190:         x[i2]   = mdiag[0]*x1;
191:         mdiag  += 1;
192:         i2     += 1;
193:       }
194:       PetscLogFlops(m);
195:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
196:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
197:     }
198:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
199:       idiag   = a->idiag+a->mbs - 1;
200:       i2      = m - 1;
201:       x1      = x[i2];
202:       x[i2]   = idiag[0]*x1;
203:       idiag -= 1;
204:       i2    -= 1;
205:       for (i=m-2; i>=0; i--) {
206:         v     = aa + (diag[i]+1);
207:         vi    = aj + diag[i] + 1;
208:         nz    = ai[i+1] - diag[i] - 1;
209:         s1    = x[i2];
210:         for (j=0; j<nz; j++) {
211:           s1 -= v[j]*x[vi[j]];
212:         }
213:         x[i2]   = idiag[0]*s1;
214:         idiag   -= 1;
215:         i2      -= 1;
216:       }
217:       PetscLogFlops(a->nz);
218:     }
219:   } else {
220:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
221:   }
222:   VecRestoreArray(xx,&x);
223:   VecRestoreArray(bb,(PetscScalar**)&b);
224:   return(0);
225: }

229: PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
230: {
231:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
232:   PetscScalar        *x,x1,x2,s1,s2;
233:   const PetscScalar  *b;
234:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
235:   PetscErrorCode     ierr;
236:   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
237:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

240:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
241:   its = its*lits;
242:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
243:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
244:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
245:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
246:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

248:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

250:   diag  = a->diag;
251:   idiag = a->idiag;
252:   VecGetArray(xx,&x);
253:   VecGetArray(bb,(PetscScalar**)&b);

255:   if (flag & SOR_ZERO_INITIAL_GUESS) {
256:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
257:       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
258:       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
259:       i2     = 2;
260:       idiag += 4;
261:       for (i=1; i<m; i++) {
262:         v     = aa + 4*ai[i];
263:         vi    = aj + ai[i];
264:         nz    = diag[i] - ai[i];
265:         s1    = b[i2]; s2 = b[i2+1];
266:         for (j=0; j<nz; j++) {
267:           idx  = 2*vi[j];
268:           it   = 4*j;
269:           x1   = x[idx]; x2 = x[1+idx];
270:           s1  -= v[it]*x1 + v[it+2]*x2;
271:           s2  -= v[it+1]*x1 + v[it+3]*x2;
272:         }
273:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
274:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
275:         idiag   += 4;
276:         i2      += 2;
277:       }
278:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
279:       PetscLogFlops(4.0*(a->nz));
280:     }
281:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
282:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
283:       i2    = 0;
284:       mdiag = a->idiag+4*a->mbs;
285:       for (i=0; i<m; i++) {
286:         x1      = x[i2]; x2 = x[i2+1];
287:         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
288:         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
289:         mdiag  += 4;
290:         i2     += 2;
291:       }
292:       PetscLogFlops(6.0*m);
293:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
294:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
295:     }
296:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
297:       idiag   = a->idiag+4*a->mbs - 4;
298:       i2      = 2*m - 2;
299:       x1      = x[i2]; x2 = x[i2+1];
300:       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
301:       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
302:       idiag -= 4;
303:       i2    -= 2;
304:       for (i=m-2; i>=0; i--) {
305:         v     = aa + 4*(diag[i]+1);
306:         vi    = aj + diag[i] + 1;
307:         nz    = ai[i+1] - diag[i] - 1;
308:         s1    = x[i2]; s2 = x[i2+1];
309:         for (j=0; j<nz; j++) {
310:            idx  = 2*vi[j];
311:           it   = 4*j;
312:           x1   = x[idx]; x2 = x[1+idx];
313:           s1  -= v[it]*x1 + v[it+2]*x2;
314:           s2  -= v[it+1]*x1 + v[it+3]*x2;
315:         }
316:         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
317:         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
318:         idiag   -= 4;
319:         i2      -= 2;
320:       }
321:       PetscLogFlops(4.0*(a->nz));
322:     }
323:   } else {
324:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
325:   }
326:   VecRestoreArray(xx,&x);
327:   VecRestoreArray(bb,(PetscScalar**)&b);
328:   return(0);
329: }

333: PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
334: {
335:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
336:   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
337:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
338:   const PetscScalar  *b;
339:   PetscErrorCode     ierr;
340:   PetscInt           m = a->mbs,i,i2,nz,idx;
341:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

344:   its = its*lits;
345:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
346:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
347:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
348:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
349:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
350:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

352:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

354:   diag  = a->diag;
355:   idiag = a->idiag;
356:   VecGetArray(xx,&x);
357:   VecGetArray(bb,(PetscScalar**)&b);

359:   if (flag & SOR_ZERO_INITIAL_GUESS) {
360:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
361:       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
362:       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
363:       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
364:       i2     = 3;
365:       idiag += 9;
366:       for (i=1; i<m; i++) {
367:         v     = aa + 9*ai[i];
368:         vi    = aj + ai[i];
369:         nz    = diag[i] - ai[i];
370:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
371:         while (nz--) {
372:           idx  = 3*(*vi++);
373:           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
374:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
375:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
376:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
377:           v   += 9;
378:         }
379:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
380:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
381:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
382:         idiag   += 9;
383:         i2      += 3;
384:       }
385:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386:       PetscLogFlops(9.0*(a->nz));
387:     }
388:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390:       i2    = 0;
391:       mdiag = a->idiag+9*a->mbs;
392:       for (i=0; i<m; i++) {
393:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
394:         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
395:         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
396:         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
397:         mdiag  += 9;
398:         i2     += 3;
399:       }
400:       PetscLogFlops(15.0*m);
401:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
402:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
403:     }
404:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
405:       idiag   = a->idiag+9*a->mbs - 9;
406:       i2      = 3*m - 3;
407:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
408:       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
409:       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
410:       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
411:       idiag -= 9;
412:       i2    -= 3;
413:       for (i=m-2; i>=0; i--) {
414:         v     = aa + 9*(diag[i]+1);
415:         vi    = aj + diag[i] + 1;
416:         nz    = ai[i+1] - diag[i] - 1;
417:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
418:         while (nz--) {
419:           idx  = 3*(*vi++);
420:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
421:           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
422:           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
423:           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
424:           v   += 9;
425:         }
426:         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
427:         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
428:         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
429:         idiag   -= 9;
430:         i2      -= 3;
431:       }
432:       PetscLogFlops(9.0*(a->nz));
433:     }
434:   } else {
435:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
436:   }
437:   VecRestoreArray(xx,&x);
438:   VecRestoreArray(bb,(PetscScalar**)&b);
439:   return(0);
440: }

444: PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
445: {
446:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
447:   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
448:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
449:   const PetscScalar  *b;
450:   PetscErrorCode     ierr;
451:   PetscInt           m = a->mbs,i,i2,nz,idx;
452:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

455:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
456:   its = its*lits;
457:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
461:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

463:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

465:   diag  = a->diag;
466:   idiag = a->idiag;
467:   VecGetArray(xx,&x);
468:   VecGetArray(bb,(PetscScalar**)&b);

470:   if (flag & SOR_ZERO_INITIAL_GUESS) {
471:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472:       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
473:       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
474:       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
475:       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
476:       i2     = 4;
477:       idiag += 16;
478:       for (i=1; i<m; i++) {
479:         v     = aa + 16*ai[i];
480:         vi    = aj + ai[i];
481:         nz    = diag[i] - ai[i];
482:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
483:         while (nz--) {
484:           idx  = 4*(*vi++);
485:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
486:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
487:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
488:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
489:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
490:           v   += 16;
491:         }
492:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
493:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
494:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
495:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
496:         idiag   += 16;
497:         i2      += 4;
498:       }
499:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
500:       PetscLogFlops(16.0*(a->nz));
501:     }
502:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
503:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
504:       i2    = 0;
505:       mdiag = a->idiag+16*a->mbs;
506:       for (i=0; i<m; i++) {
507:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508:         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
509:         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
510:         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
511:         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
512:         mdiag  += 16;
513:         i2     += 4;
514:       }
515:       PetscLogFlops(28.0*m);
516:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
517:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
518:     }
519:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
520:       idiag   = a->idiag+16*a->mbs - 16;
521:       i2      = 4*m - 4;
522:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
523:       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
524:       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
525:       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
526:       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
527:       idiag -= 16;
528:       i2    -= 4;
529:       for (i=m-2; i>=0; i--) {
530:         v     = aa + 16*(diag[i]+1);
531:         vi    = aj + diag[i] + 1;
532:         nz    = ai[i+1] - diag[i] - 1;
533:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
534:         while (nz--) {
535:           idx  = 4*(*vi++);
536:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
537:           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
538:           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
539:           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
540:           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
541:           v   += 16;
542:         }
543:         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
544:         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
545:         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
546:         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
547:         idiag   -= 16;
548:         i2      -= 4;
549:       }
550:       PetscLogFlops(16.0*(a->nz));
551:     }
552:   } else {
553:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
554:   }
555:   VecRestoreArray(xx,&x);
556:   VecRestoreArray(bb,(PetscScalar**)&b);
557:   return(0);
558: }

562: PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
563: {
564:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
565:   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
566:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
567:   const PetscScalar  *b;
568:   PetscErrorCode     ierr;
569:   PetscInt           m = a->mbs,i,i2,nz,idx;
570:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

573:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
574:   its = its*lits;
575:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
576:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
577:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
578:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
579:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

581:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

583:   diag  = a->diag;
584:   idiag = a->idiag;
585:   VecGetArray(xx,&x);
586:   VecGetArray(bb,(PetscScalar**)&b);

588:   if (flag & SOR_ZERO_INITIAL_GUESS) {
589:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
590:       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
591:       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
592:       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
593:       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
594:       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
595:       i2     = 5;
596:       idiag += 25;
597:       for (i=1; i<m; i++) {
598:         v     = aa + 25*ai[i];
599:         vi    = aj + ai[i];
600:         nz    = diag[i] - ai[i];
601:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
602:         while (nz--) {
603:           idx  = 5*(*vi++);
604:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
605:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
606:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
607:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
608:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
609:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
610:           v   += 25;
611:         }
612:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
613:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
614:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
615:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
616:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
617:         idiag   += 25;
618:         i2      += 5;
619:       }
620:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
621:       PetscLogFlops(25.0*(a->nz));
622:     }
623:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
624:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
625:       i2    = 0;
626:       mdiag = a->idiag+25*a->mbs;
627:       for (i=0; i<m; i++) {
628:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
629:         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
630:         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
631:         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
632:         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
633:         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
634:         mdiag  += 25;
635:         i2     += 5;
636:       }
637:       PetscLogFlops(45.0*m);
638:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
639:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
640:     }
641:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
642:       idiag   = a->idiag+25*a->mbs - 25;
643:       i2      = 5*m - 5;
644:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
645:       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
646:       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
647:       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
648:       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
649:       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
650:       idiag -= 25;
651:       i2    -= 5;
652:       for (i=m-2; i>=0; i--) {
653:         v     = aa + 25*(diag[i]+1);
654:         vi    = aj + diag[i] + 1;
655:         nz    = ai[i+1] - diag[i] - 1;
656:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
657:         while (nz--) {
658:           idx  = 5*(*vi++);
659:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
660:           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
661:           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
662:           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
663:           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
664:           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
665:           v   += 25;
666:         }
667:         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
668:         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
669:         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
670:         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
671:         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
672:         idiag   -= 25;
673:         i2      -= 5;
674:       }
675:       PetscLogFlops(25.0*(a->nz));
676:     }
677:   } else {
678:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
679:   }
680:   VecRestoreArray(xx,&x);
681:   VecRestoreArray(bb,(PetscScalar**)&b);
682:   return(0);
683: }

687: PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
688: {
689:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
690:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
691:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
692:   const PetscScalar  *b;
693:   PetscErrorCode     ierr;
694:   PetscInt           m = a->mbs,i,i2,nz,idx;
695:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

698:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
699:   its = its*lits;
700:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
701:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
702:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
703:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
704:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

706:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

708:   diag  = a->diag;
709:   idiag = a->idiag;
710:   VecGetArray(xx,&x);
711:   VecGetArray(bb,(PetscScalar**)&b);

713:   if (flag & SOR_ZERO_INITIAL_GUESS) {
714:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
715:       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
716:       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
717:       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
718:       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
719:       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
720:       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
721:       i2     = 6;
722:       idiag += 36;
723:       for (i=1; i<m; i++) {
724:         v     = aa + 36*ai[i];
725:         vi    = aj + ai[i];
726:         nz    = diag[i] - ai[i];
727:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
728:         while (nz--) {
729:           idx  = 6*(*vi++);
730:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
731:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
732:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
733:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
734:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
735:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
736:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
737:           v   += 36;
738:         }
739:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
740:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
741:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
742:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
743:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
744:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
745:         idiag   += 36;
746:         i2      += 6;
747:       }
748:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
749:       PetscLogFlops(36.0*(a->nz));
750:     }
751:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
752:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
753:       i2    = 0;
754:       mdiag = a->idiag+36*a->mbs;
755:       for (i=0; i<m; i++) {
756:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
757:         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
758:         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
759:         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
760:         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
761:         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
762:         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
763:         mdiag  += 36;
764:         i2     += 6;
765:       }
766:       PetscLogFlops(60.0*m);
767:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
769:     }
770:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
771:       idiag   = a->idiag+36*a->mbs - 36;
772:       i2      = 6*m - 6;
773:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
774:       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
775:       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
776:       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
777:       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
778:       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
779:       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
780:       idiag -= 36;
781:       i2    -= 6;
782:       for (i=m-2; i>=0; i--) {
783:         v     = aa + 36*(diag[i]+1);
784:         vi    = aj + diag[i] + 1;
785:         nz    = ai[i+1] - diag[i] - 1;
786:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
787:         while (nz--) {
788:           idx  = 6*(*vi++);
789:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
790:           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
791:           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
792:           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
793:           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
794:           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
795:           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
796:           v   += 36;
797:         }
798:         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
799:         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
800:         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
801:         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
802:         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
803:         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
804:         idiag   -= 36;
805:         i2      -= 6;
806:       }
807:       PetscLogFlops(36.0*(a->nz));
808:     }
809:   } else {
810:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
811:   }
812:   VecRestoreArray(xx,&x);
813:   VecRestoreArray(bb,(PetscScalar**)&b);
814:   return(0);
815: }

819: PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
820: {
821:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
822:   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
823:   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
824:   const PetscScalar  *b;
825:   PetscErrorCode     ierr;
826:   PetscInt           m = a->mbs,i,i2,nz,idx;
827:   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;

830:   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
831:   its = its*lits;
832:   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
833:   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
834:   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
835:   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
836:   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");

838:   if (!a->idiagvalid){MatInvertBlockDiagonal_SeqBAIJ(A);}

840:   diag  = a->diag;
841:   idiag = a->idiag;
842:   VecGetArray(xx,&x);
843:   VecGetArray(bb,(PetscScalar**)&b);

845:   if (flag & SOR_ZERO_INITIAL_GUESS) {
846:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
847:       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
848:       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
849:       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
850:       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
851:       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
852:       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
853:       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
854:       i2     = 7;
855:       idiag += 49;
856:       for (i=1; i<m; i++) {
857:         v     = aa + 49*ai[i];
858:         vi    = aj + ai[i];
859:         nz    = diag[i] - ai[i];
860:         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
861:         while (nz--) {
862:           idx  = 7*(*vi++);
863:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
864:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
865:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
866:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
867:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
868:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
869:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
870:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
871:           v   += 49;
872:         }
873:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
874:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
875:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
876:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
877:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
878:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
879:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
880:         idiag   += 49;
881:         i2      += 7;
882:       }
883:       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
884:       PetscLogFlops(49.0*(a->nz));
885:     }
886:     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
887:         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
888:       i2    = 0;
889:       mdiag = a->idiag+49*a->mbs;
890:       for (i=0; i<m; i++) {
891:         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
892:         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
893:         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
894:         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
895:         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
896:         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
897:         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
898:         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
899:         mdiag  += 36;
900:         i2     += 6;
901:       }
902:       PetscLogFlops(93.0*m);
903:     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
904:       PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));
905:     }
906:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
907:       idiag   = a->idiag+49*a->mbs - 49;
908:       i2      = 7*m - 7;
909:       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
910:       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
911:       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
912:       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
913:       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
914:       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
915:       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
916:       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
917:       idiag -= 49;
918:       i2    -= 7;
919:       for (i=m-2; i>=0; i--) {
920:         v     = aa + 49*(diag[i]+1);
921:         vi    = aj + diag[i] + 1;
922:         nz    = ai[i+1] - diag[i] - 1;
923:         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
924:         while (nz--) {
925:           idx  = 7*(*vi++);
926:           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
927:           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
928:           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
929:           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
930:           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
931:           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
932:           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
933:           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
934:           v   += 49;
935:         }
936:         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
937:         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
938:         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
939:         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
940:         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
941:         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
942:         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
943:         idiag   -= 49;
944:         i2      -= 7;
945:       }
946:       PetscLogFlops(49.0*(a->nz));
947:     }
948:   } else {
949:     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
950:   }
951:   VecRestoreArray(xx,&x);
952:   VecRestoreArray(bb,(PetscScalar**)&b);
953:   return(0);
954: }

956: /*
957:     Special version for direct calls from Fortran (Used in PETSc-fun3d)
958: */
959: #if defined(PETSC_HAVE_FORTRAN_CAPS)
960: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
961: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
962: #define matsetvaluesblocked4_ matsetvaluesblocked4
963: #endif

968: void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
969: {
970:   Mat               A = *AA;
971:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
972:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
973:   PetscInt          *ai=a->i,*ailen=a->ilen;
974:   PetscInt          *aj=a->j,stepval,lastcol = -1;
975:   const PetscScalar *value = v;
976:   MatScalar         *ap,*aa = a->a,*bap;

979:   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
980:   stepval = (n-1)*4;
981:   for (k=0; k<m; k++) { /* loop over added rows */
982:     row  = im[k];
983:     rp   = aj + ai[row];
984:     ap   = aa + 16*ai[row];
985:     nrow = ailen[row];
986:     low  = 0;
987:     high = nrow;
988:     for (l=0; l<n; l++) { /* loop over added columns */
989:       col = in[l];
990:       if (col <= lastcol) low = 0; else high = nrow;
991:       lastcol = col;
992:       value = v + k*(stepval+4 + l)*4;
993:       while (high-low > 7) {
994:         t = (low+high)/2;
995:         if (rp[t] > col) high = t;
996:         else             low  = t;
997:       }
998:       for (i=low; i<high; i++) {
999:         if (rp[i] > col) break;
1000:         if (rp[i] == col) {
1001:           bap  = ap +  16*i;
1002:           for (ii=0; ii<4; ii++,value+=stepval) {
1003:             for (jj=ii; jj<16; jj+=4) {
1004:               bap[jj] += *value++;
1005:             }
1006:           }
1007:           goto noinsert2;
1008:         }
1009:       }
1010:       N = nrow++ - 1;
1011:       high++; /* added new column index thus must search to one higher than before */
1012:       /* shift up all the later entries in this row */
1013:       for (ii=N; ii>=i; ii--) {
1014:         rp[ii+1] = rp[ii];
1015:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1016:       }
1017:       if (N >= i) {
1018:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1019:       }
1020:       rp[i] = col;
1021:       bap   = ap +  16*i;
1022:       for (ii=0; ii<4; ii++,value+=stepval) {
1023:         for (jj=ii; jj<16; jj+=4) {
1024:           bap[jj] = *value++;
1025:         }
1026:       }
1027:       noinsert2:;
1028:       low = i;
1029:     }
1030:     ailen[row] = nrow;
1031:   }
1032:   PetscFunctionReturnVoid();
1033: }

1036: #if defined(PETSC_HAVE_FORTRAN_CAPS)
1037: #define matsetvalues4_ MATSETVALUES4
1038: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1039: #define matsetvalues4_ matsetvalues4
1040: #endif

1045: void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1046: {
1047:   Mat         A = *AA;
1048:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1049:   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1050:   PetscInt    *ai=a->i,*ailen=a->ilen;
1051:   PetscInt    *aj=a->j,brow,bcol;
1052:   PetscInt    ridx,cidx,lastcol = -1;
1053:   MatScalar   *ap,value,*aa=a->a,*bap;
1054: 
1056:   for (k=0; k<m; k++) { /* loop over added rows */
1057:     row  = im[k]; brow = row/4;
1058:     rp   = aj + ai[brow];
1059:     ap   = aa + 16*ai[brow];
1060:     nrow = ailen[brow];
1061:     low  = 0;
1062:     high = nrow;
1063:     for (l=0; l<n; l++) { /* loop over added columns */
1064:       col = in[l]; bcol = col/4;
1065:       ridx = row % 4; cidx = col % 4;
1066:       value = v[l + k*n];
1067:       if (col <= lastcol) low = 0; else high = nrow;
1068:       lastcol = col;
1069:       while (high-low > 7) {
1070:         t = (low+high)/2;
1071:         if (rp[t] > bcol) high = t;
1072:         else              low  = t;
1073:       }
1074:       for (i=low; i<high; i++) {
1075:         if (rp[i] > bcol) break;
1076:         if (rp[i] == bcol) {
1077:           bap  = ap +  16*i + 4*cidx + ridx;
1078:           *bap += value;
1079:           goto noinsert1;
1080:         }
1081:       }
1082:       N = nrow++ - 1;
1083:       high++; /* added new column thus must search to one higher than before */
1084:       /* shift up all the later entries in this row */
1085:       for (ii=N; ii>=i; ii--) {
1086:         rp[ii+1] = rp[ii];
1087:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1088:       }
1089:       if (N>=i) {
1090:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1091:       }
1092:       rp[i]                    = bcol;
1093:       ap[16*i + 4*cidx + ridx] = value;
1094:       noinsert1:;
1095:       low = i;
1096:     }
1097:     ailen[brow] = nrow;
1098:   }
1099:   PetscFunctionReturnVoid();
1100: }

1103: /*
1104:      Checks for missing diagonals
1105: */
1108: PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1109: {
1110:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112:   PetscInt       *diag,*jj = a->j,i;

1115:   MatMarkDiagonal_SeqBAIJ(A);
1116:   *missing = PETSC_FALSE;
1117:   if (A->rmap->n > 0 && !jj) {
1118:     *missing  = PETSC_TRUE;
1119:     if (d) *d = 0;
1120:     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1121:   } else {
1122:     diag     = a->diag;
1123:     for (i=0; i<a->mbs; i++) {
1124:       if (jj[diag[i]] != i) {
1125:         *missing  = PETSC_TRUE;
1126:         if (d) *d = i;
1127:         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1128:       }
1129:     }
1130:   }
1131:   return(0);
1132: }

1136: PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1137: {
1138:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1140:   PetscInt       i,j,m = a->mbs;

1143:   if (!a->diag) {
1144:     PetscMalloc(m*sizeof(PetscInt),&a->diag);
1145:     PetscLogObjectMemory(A,m*sizeof(PetscInt));
1146:     a->free_diag = PETSC_TRUE;
1147:   }
1148:   for (i=0; i<m; i++) {
1149:     a->diag[i] = a->i[i+1];
1150:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1151:       if (a->j[j] == i) {
1152:         a->diag[i] = j;
1153:         break;
1154:       }
1155:     }
1156:   }
1157:   return(0);
1158: }


1161: EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);

1165: static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1166: {
1167:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1169:   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1170:   PetscInt       *tia, *tja;

1173:   *nn = n;
1174:   if (!ia) return(0);
1175:   if (symmetric) {
1176:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);
1177:     nz   = tia[n];
1178:   } else {
1179:     tia = a->i; tja = a->j;
1180:   }
1181: 
1182:   if (!blockcompressed && bs > 1) {
1183:     (*nn) *= bs;
1184:     nbs    = bs;
1185:     /* malloc & create the natural set of indices */
1186:     PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);
1187:     if (n) {
1188:       (*ia)[0] = 0;
1189:       for (j=1; j<bs; j++) {
1190:         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1191:       }
1192:     }

1194:     for (i=1; i<n; i++) {
1195:       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1196:       for (j=1; j<bs; j++) {
1197:         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1198:       }
1199:     }
1200:     if (n) {
1201:       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1202:     }

1204:     if (ja) {
1205:       PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);
1206:       cnt = 0;
1207:       for (i=0; i<n; i++) {
1208:         for (j=0; j<bs; j++) {
1209:           for (k=tia[i]; k<tia[i+1]; k++) {
1210:             for (l=0; l<bs; l++) {
1211:               (*ja)[cnt++] = bs*tja[k] + l;
1212:             }
1213:           }
1214:         }
1215:       }
1216:     }

1218:     n     *= bs;
1219:     nz *= bs*bs;
1220:     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1221:       PetscFree(tia);
1222:       PetscFree(tja);
1223:     }
1224:   } else if (oshift == 1) {
1225:     if (symmetric) {
1226:       PetscInt nz = tia[A->rmap->n/bs];
1227:       /*  add 1 to i and j indices */
1228:       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1229:       *ia = tia;
1230:       if (ja) {
1231:         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1232:         *ja = tja;
1233:       }
1234:     } else {
1235:       PetscInt nz = a->i[A->rmap->n/bs];
1236:       /* malloc space and  add 1 to i and j indices */
1237:       PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);
1238:       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1239:       if (ja) {
1240:         PetscMalloc(nz*sizeof(PetscInt),ja);
1241:         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1242:       }
1243:     }
1244:   } else {
1245:     *ia = tia;
1246:     if (ja) *ja = tja;
1247:   }
1248: 
1249:   return(0);
1250: }

1254: static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1255: {

1259:   if (!ia) return(0);
1260:   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1261:     PetscFree(*ia);
1262:     if (ja) {PetscFree(*ja);}
1263:   }
1264:   return(0);
1265: }

1269: PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1270: {
1271:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1275: #if defined(PETSC_USE_LOG)
1276:   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1277: #endif
1278:   MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);
1279:   if (a->row) {
1280:     ISDestroy(a->row);
1281:   }
1282:   if (a->col) {
1283:     ISDestroy(a->col);
1284:   }
1285:   if (a->free_diag) {PetscFree(a->diag);}
1286:   PetscFree(a->idiag);
1287:   if (a->free_imax_ilen) {PetscFree2(a->imax,a->ilen);}
1288:   PetscFree(a->solve_work);
1289:   PetscFree(a->mult_work);
1290:   if (a->icol) {ISDestroy(a->icol);}
1291:   PetscFree(a->saved_values);
1292:   PetscFree(a->xtoy);
1293:   if (a->compressedrow.use){PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}

1295:   if (a->sbaijMat) {MatDestroy(a->sbaijMat);}
1296:   if (a->parent) {MatDestroy(a->parent);}
1297:   PetscFree(a);

1299:   PetscObjectChangeTypeName((PetscObject)A,0);
1300:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);
1301:   PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);
1302:   PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);
1303:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);
1304:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);
1305:   PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);
1306:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);
1307:   PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);
1308:   return(0);
1309: }

1313: PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1314: {
1315:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;

1319:   switch (op) {
1320:   case MAT_ROW_ORIENTED:
1321:     a->roworiented    = flg;
1322:     break;
1323:   case MAT_KEEP_NONZERO_PATTERN:
1324:     a->keepnonzeropattern = flg;
1325:     break;
1326:   case MAT_NEW_NONZERO_LOCATIONS:
1327:     a->nonew          = (flg ? 0 : 1);
1328:     break;
1329:   case MAT_NEW_NONZERO_LOCATION_ERR:
1330:     a->nonew          = (flg ? -1 : 0);
1331:     break;
1332:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333:     a->nonew          = (flg ? -2 : 0);
1334:     break;
1335:   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1336:     a->nounused       = (flg ? -1 : 0);
1337:     break;
1338:   case MAT_NEW_DIAGONALS:
1339:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1340:   case MAT_USE_HASH_TABLE:
1341:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1342:     break;
1343:   case MAT_SYMMETRIC:
1344:   case MAT_STRUCTURALLY_SYMMETRIC:
1345:   case MAT_HERMITIAN:
1346:   case MAT_SYMMETRY_ETERNAL:
1347:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1348:     break;
1349:   default:
1350:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1351:   }
1352:   return(0);
1353: }

1357: PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358: {
1359:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1361:   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1362:   MatScalar      *aa,*aa_i;
1363:   PetscScalar    *v_i;

1366:   bs  = A->rmap->bs;
1367:   ai  = a->i;
1368:   aj  = a->j;
1369:   aa  = a->a;
1370:   bs2 = a->bs2;
1371: 
1372:   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1373: 
1374:   bn  = row/bs;   /* Block number */
1375:   bp  = row % bs; /* Block Position */
1376:   M   = ai[bn+1] - ai[bn];
1377:   *nz = bs*M;
1378: 
1379:   if (v) {
1380:     *v = 0;
1381:     if (*nz) {
1382:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
1383:       for (i=0; i<M; i++) { /* for each block in the block row */
1384:         v_i  = *v + i*bs;
1385:         aa_i = aa + bs2*(ai[bn] + i);
1386:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1387:       }
1388:     }
1389:   }

1391:   if (idx) {
1392:     *idx = 0;
1393:     if (*nz) {
1394:       PetscMalloc((*nz)*sizeof(PetscInt),idx);
1395:       for (i=0; i<M; i++) { /* for each block in the block row */
1396:         idx_i = *idx + i*bs;
1397:         itmp  = bs*aj[ai[bn] + i];
1398:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1399:       }
1400:     }
1401:   }
1402:   return(0);
1403: }

1407: PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408: {

1412:   if (idx) {PetscFree(*idx);}
1413:   if (v)   {PetscFree(*v);}
1414:   return(0);
1415: }


1421: PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1422: {
1423:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1424:   Mat            C;
1426:   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1427:   PetscInt       *rows,*cols,bs2=a->bs2;
1428:   MatScalar      *array;

1431:   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1432:   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1433:     PetscMalloc((1+nbs)*sizeof(PetscInt),&col);
1434:     PetscMemzero(col,(1+nbs)*sizeof(PetscInt));

1436:     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1437:     MatCreate(((PetscObject)A)->comm,&C);
1438:     MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);
1439:     MatSetType(C,((PetscObject)A)->type_name);
1440:     MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);
1441:     PetscFree(col);
1442:   } else {
1443:     C = *B;
1444:   }

1446:   array = a->a;
1447:   PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);
1448:   for (i=0; i<mbs; i++) {
1449:     cols[0] = i*bs;
1450:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1451:     len = ai[i+1] - ai[i];
1452:     for (j=0; j<len; j++) {
1453:       rows[0] = (*aj++)*bs;
1454:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1455:       MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);
1456:       array += bs2;
1457:     }
1458:   }
1459:   PetscFree2(rows,cols);
1460: 
1461:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1462:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1463: 
1464:   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1465:     *B = C;
1466:   } else {
1467:     MatHeaderCopy(A,C);
1468:   }
1469:   return(0);
1470: }

1474: static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1475: {
1476:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478:   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1479:   int            fd;
1480:   PetscScalar    *aa;
1481:   FILE           *file;

1484:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1485:   PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);
1486:   col_lens[0] = MAT_FILE_COOKIE;

1488:   col_lens[1] = A->rmap->N;
1489:   col_lens[2] = A->cmap->n;
1490:   col_lens[3] = a->nz*bs2;

1492:   /* store lengths of each row and write (including header) to file */
1493:   count = 0;
1494:   for (i=0; i<a->mbs; i++) {
1495:     for (j=0; j<bs; j++) {
1496:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1497:     }
1498:   }
1499:   PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);
1500:   PetscFree(col_lens);

1502:   /* store column indices (zero start index) */
1503:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);
1504:   count = 0;
1505:   for (i=0; i<a->mbs; i++) {
1506:     for (j=0; j<bs; j++) {
1507:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1508:         for (l=0; l<bs; l++) {
1509:           jj[count++] = bs*a->j[k] + l;
1510:         }
1511:       }
1512:     }
1513:   }
1514:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);
1515:   PetscFree(jj);

1517:   /* store nonzero values */
1518:   PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
1519:   count = 0;
1520:   for (i=0; i<a->mbs; i++) {
1521:     for (j=0; j<bs; j++) {
1522:       for (k=a->i[i]; k<a->i[i+1]; k++) {
1523:         for (l=0; l<bs; l++) {
1524:           aa[count++] = a->a[bs2*k + l*bs + j];
1525:         }
1526:       }
1527:     }
1528:   }
1529:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);
1530:   PetscFree(aa);

1532:   PetscViewerBinaryGetInfoPointer(viewer,&file);
1533:   if (file) {
1534:     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1535:   }
1536:   return(0);
1537: }

1541: static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1542: {
1543:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1544:   PetscErrorCode    ierr;
1545:   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1546:   PetscViewerFormat format;

1549:   PetscViewerGetFormat(viewer,&format);
1550:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1551:     PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);
1552:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1553:     Mat aij;
1554:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);
1555:     MatView(aij,viewer);
1556:     MatDestroy(aij);
1557:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1558:      return(0);
1559:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1560:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1561:     for (i=0; i<a->mbs; i++) {
1562:       for (j=0; j<bs; j++) {
1563:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1564:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1565:           for (l=0; l<bs; l++) {
1566: #if defined(PETSC_USE_COMPLEX)
1567:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1568:               PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1569:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1570:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1571:               PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1572:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1573:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1574:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1575:             }
1576: #else
1577:             if (a->a[bs2*k + l*bs + j] != 0.0) {
1578:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1579:             }
1580: #endif
1581:           }
1582:         }
1583:         PetscViewerASCIIPrintf(viewer,"\n");
1584:       }
1585:     }
1586:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1587:   } else {
1588:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
1589:     for (i=0; i<a->mbs; i++) {
1590:       for (j=0; j<bs; j++) {
1591:         PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);
1592:         for (k=a->i[i]; k<a->i[i+1]; k++) {
1593:           for (l=0; l<bs; l++) {
1594: #if defined(PETSC_USE_COMPLEX)
1595:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1596:               PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1597:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1598:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1599:               PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1600:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
1601:             } else {
1602:               PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
1603:             }
1604: #else
1605:             PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
1606: #endif
1607:           }
1608:         }
1609:         PetscViewerASCIIPrintf(viewer,"\n");
1610:       }
1611:     }
1612:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
1613:   }
1614:   PetscViewerFlush(viewer);
1615:   return(0);
1616: }

1620: static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1621: {
1622:   Mat            A = (Mat) Aa;
1623:   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1625:   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1626:   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1627:   MatScalar      *aa;
1628:   PetscViewer    viewer;


1632:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1633:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

1635:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

1637:   /* loop over matrix elements drawing boxes */
1638:   color = PETSC_DRAW_BLUE;
1639:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1640:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1641:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643:       aa = a->a + j*bs2;
1644:       for (k=0; k<bs; k++) {
1645:         for (l=0; l<bs; l++) {
1646:           if (PetscRealPart(*aa++) >=  0.) continue;
1647:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1648:         }
1649:       }
1650:     }
1651:   }
1652:   color = PETSC_DRAW_CYAN;
1653:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1654:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1655:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1656:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1657:       aa = a->a + j*bs2;
1658:       for (k=0; k<bs; k++) {
1659:         for (l=0; l<bs; l++) {
1660:           if (PetscRealPart(*aa++) != 0.) continue;
1661:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1662:         }
1663:       }
1664:     }
1665:   }

1667:   color = PETSC_DRAW_RED;
1668:   for (i=0,row=0; i<mbs; i++,row+=bs) {
1669:     for (j=a->i[i]; j<a->i[i+1]; j++) {
1670:       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1671:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1672:       aa = a->a + j*bs2;
1673:       for (k=0; k<bs; k++) {
1674:         for (l=0; l<bs; l++) {
1675:           if (PetscRealPart(*aa++) <= 0.) continue;
1676:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
1677:         }
1678:       }
1679:     }
1680:   }
1681:   return(0);
1682: }

1686: static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687: {
1689:   PetscReal      xl,yl,xr,yr,w,h;
1690:   PetscDraw      draw;
1691:   PetscTruth     isnull;


1695:   PetscViewerDrawGetDraw(viewer,0,&draw);
1696:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

1698:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
1699:   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1700:   xr += w;    yr += h;  xl = -w;     yl = -h;
1701:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
1702:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
1703:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
1704:   return(0);
1705: }

1709: PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1710: {
1712:   PetscTruth     iascii,isbinary,isdraw;

1715:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1716:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1717:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1718:   if (iascii){
1719:     MatView_SeqBAIJ_ASCII(A,viewer);
1720:   } else if (isbinary) {
1721:     MatView_SeqBAIJ_Binary(A,viewer);
1722:   } else if (isdraw) {
1723:     MatView_SeqBAIJ_Draw(A,viewer);
1724:   } else {
1725:     Mat B;
1726:     MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
1727:     MatView(B,viewer);
1728:     MatDestroy(B);
1729:   }
1730:   return(0);
1731: }


1736: PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1737: {
1738:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1739:   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1740:   PetscInt    *ai = a->i,*ailen = a->ilen;
1741:   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1742:   MatScalar   *ap,*aa = a->a;

1745:   for (k=0; k<m; k++) { /* loop over rows */
1746:     row  = im[k]; brow = row/bs;
1747:     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1748:     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1749:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1750:     nrow = ailen[brow];
1751:     for (l=0; l<n; l++) { /* loop over columns */
1752:       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1753:       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1754:       col  = in[l] ;
1755:       bcol = col/bs;
1756:       cidx = col%bs;
1757:       ridx = row%bs;
1758:       high = nrow;
1759:       low  = 0; /* assume unsorted */
1760:       while (high-low > 5) {
1761:         t = (low+high)/2;
1762:         if (rp[t] > bcol) high = t;
1763:         else             low  = t;
1764:       }
1765:       for (i=low; i<high; i++) {
1766:         if (rp[i] > bcol) break;
1767:         if (rp[i] == bcol) {
1768:           *v++ = ap[bs2*i+bs*cidx+ridx];
1769:           goto finished;
1770:         }
1771:       }
1772:       *v++ = 0.0;
1773:       finished:;
1774:     }
1775:   }
1776:   return(0);
1777: }

1779: #define CHUNKSIZE 10
1782: PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1783: {
1784:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1785:   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1786:   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1787:   PetscErrorCode    ierr;
1788:   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1789:   PetscTruth        roworiented=a->roworiented;
1790:   const PetscScalar *value = v;
1791:   MatScalar         *ap,*aa = a->a,*bap;

1794:   if (roworiented) {
1795:     stepval = (n-1)*bs;
1796:   } else {
1797:     stepval = (m-1)*bs;
1798:   }
1799:   for (k=0; k<m; k++) { /* loop over added rows */
1800:     row  = im[k];
1801:     if (row < 0) continue;
1802: #if defined(PETSC_USE_DEBUG)  
1803:     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1804: #endif
1805:     rp   = aj + ai[row];
1806:     ap   = aa + bs2*ai[row];
1807:     rmax = imax[row];
1808:     nrow = ailen[row];
1809:     low  = 0;
1810:     high = nrow;
1811:     for (l=0; l<n; l++) { /* loop over added columns */
1812:       if (in[l] < 0) continue;
1813: #if defined(PETSC_USE_DEBUG)  
1814:       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1815: #endif
1816:       col = in[l];
1817:       if (roworiented) {
1818:         value = v + k*(stepval+bs)*bs + l*bs;
1819:       } else {
1820:         value = v + l*(stepval+bs)*bs + k*bs;
1821:       }
1822:       if (col <= lastcol) low = 0; else high = nrow;
1823:       lastcol = col;
1824:       while (high-low > 7) {
1825:         t = (low+high)/2;
1826:         if (rp[t] > col) high = t;
1827:         else             low  = t;
1828:       }
1829:       for (i=low; i<high; i++) {
1830:         if (rp[i] > col) break;
1831:         if (rp[i] == col) {
1832:           bap  = ap +  bs2*i;
1833:           if (roworiented) {
1834:             if (is == ADD_VALUES) {
1835:               for (ii=0; ii<bs; ii++,value+=stepval) {
1836:                 for (jj=ii; jj<bs2; jj+=bs) {
1837:                   bap[jj] += *value++;
1838:                 }
1839:               }
1840:             } else {
1841:               for (ii=0; ii<bs; ii++,value+=stepval) {
1842:                 for (jj=ii; jj<bs2; jj+=bs) {
1843:                   bap[jj] = *value++;
1844:                 }
1845:               }
1846:             }
1847:           } else {
1848:             if (is == ADD_VALUES) {
1849:               for (ii=0; ii<bs; ii++,value+=stepval) {
1850:                 for (jj=0; jj<bs; jj++) {
1851:                   *bap++ += *value++;
1852:                 }
1853:               }
1854:             } else {
1855:               for (ii=0; ii<bs; ii++,value+=stepval) {
1856:                 for (jj=0; jj<bs; jj++) {
1857:                   *bap++  = *value++;
1858:                 }
1859:               }
1860:             }
1861:           }
1862:           goto noinsert2;
1863:         }
1864:       }
1865:       if (nonew == 1) goto noinsert2;
1866:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1867:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868:       N = nrow++ - 1; high++;
1869:       /* shift up all the later entries in this row */
1870:       for (ii=N; ii>=i; ii--) {
1871:         rp[ii+1] = rp[ii];
1872:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1873:       }
1874:       if (N >= i) {
1875:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1876:       }
1877:       rp[i] = col;
1878:       bap   = ap +  bs2*i;
1879:       if (roworiented) {
1880:         for (ii=0; ii<bs; ii++,value+=stepval) {
1881:           for (jj=ii; jj<bs2; jj+=bs) {
1882:             bap[jj] = *value++;
1883:           }
1884:         }
1885:       } else {
1886:         for (ii=0; ii<bs; ii++,value+=stepval) {
1887:           for (jj=0; jj<bs; jj++) {
1888:             *bap++  = *value++;
1889:           }
1890:         }
1891:       }
1892:       noinsert2:;
1893:       low = i;
1894:     }
1895:     ailen[row] = nrow;
1896:   }
1897:   return(0);
1898: }

1902: PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1903: {
1904:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1905:   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1906:   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1908:   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1909:   MatScalar      *aa = a->a,*ap;
1910:   PetscReal      ratio=0.6;

1913:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

1915:   if (m) rmax = ailen[0];
1916:   for (i=1; i<mbs; i++) {
1917:     /* move each row back by the amount of empty slots (fshift) before it*/
1918:     fshift += imax[i-1] - ailen[i-1];
1919:     rmax   = PetscMax(rmax,ailen[i]);
1920:     if (fshift) {
1921:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1922:       N = ailen[i];
1923:       for (j=0; j<N; j++) {
1924:         ip[j-fshift] = ip[j];
1925:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
1926:       }
1927:     }
1928:     ai[i] = ai[i-1] + ailen[i-1];
1929:   }
1930:   if (mbs) {
1931:     fshift += imax[mbs-1] - ailen[mbs-1];
1932:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933:   }
1934:   /* reset ilen and imax for each row */
1935:   for (i=0; i<mbs; i++) {
1936:     ailen[i] = imax[i] = ai[i+1] - ai[i];
1937:   }
1938:   a->nz = ai[mbs];

1940:   /* diagonals may have moved, so kill the diagonal pointers */
1941:   a->idiagvalid = PETSC_FALSE;
1942:   if (fshift && a->diag) {
1943:     PetscFree(a->diag);
1944:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));
1945:     a->diag = 0;
1946:   }
1947:   if (fshift && a->nounused == -1) {
1948:     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1949:   }
1950:   PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);
1951:   PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);
1952:   PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);
1953:   a->reallocs          = 0;
1954:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1956:   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1957:   if (a->compressedrow.use){
1958:     Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);
1959:   }

1961:   A->same_nonzero = PETSC_TRUE;
1962:   return(0);
1963: }

1965: /* 
1966:    This function returns an array of flags which indicate the locations of contiguous
1967:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1968:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1969:    Assume: sizes should be long enough to hold all the values.
1970: */
1973: static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1974: {
1975:   PetscInt   i,j,k,row;
1976:   PetscTruth flg;

1979:   for (i=0,j=0; i<n; j++) {
1980:     row = idx[i];
1981:     if (row%bs!=0) { /* Not the begining of a block */
1982:       sizes[j] = 1;
1983:       i++;
1984:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1985:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1986:       i++;
1987:     } else { /* Begining of the block, so check if the complete block exists */
1988:       flg = PETSC_TRUE;
1989:       for (k=1; k<bs; k++) {
1990:         if (row+k != idx[i+k]) { /* break in the block */
1991:           flg = PETSC_FALSE;
1992:           break;
1993:         }
1994:       }
1995:       if (flg) { /* No break in the bs */
1996:         sizes[j] = bs;
1997:         i+= bs;
1998:       } else {
1999:         sizes[j] = 1;
2000:         i++;
2001:       }
2002:     }
2003:   }
2004:   *bs_max = j;
2005:   return(0);
2006: }
2007: 
2010: PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2011: {
2012:   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2014:   PetscInt       i,j,k,count,*rows;
2015:   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2016:   PetscScalar    zero = 0.0;
2017:   MatScalar      *aa;

2020:   /* Make a copy of the IS and  sort it */
2021:   /* allocate memory for rows,sizes */
2022:   PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);

2024:   /* copy IS values to rows, and sort them */
2025:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2026:   PetscSortInt(is_n,rows);
2027:   if (baij->keepnonzeropattern) {
2028:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2029:     bs_max = is_n;
2030:     A->same_nonzero = PETSC_TRUE;
2031:   } else {
2032:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
2033:     A->same_nonzero = PETSC_FALSE;
2034:   }

2036:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2037:     row   = rows[j];
2038:     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2039:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2040:     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2041:     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2042:       if (diag != 0.0) {
2043:         if (baij->ilen[row/bs] > 0) {
2044:           baij->ilen[row/bs]       = 1;
2045:           baij->j[baij->i[row/bs]] = row/bs;
2046:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
2047:         }
2048:         /* Now insert all the diagonal values for this bs */
2049:         for (k=0; k<bs; k++) {
2050:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);
2051:         }
2052:       } else { /* (diag == 0.0) */
2053:         baij->ilen[row/bs] = 0;
2054:       } /* end (diag == 0.0) */
2055:     } else { /* (sizes[i] != bs) */
2056: #if defined (PETSC_USE_DEBUG)
2057:       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2058: #endif
2059:       for (k=0; k<count; k++) {
2060:         aa[0] =  zero;
2061:         aa    += bs;
2062:       }
2063:       if (diag != 0.0) {
2064:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);
2065:       }
2066:     }
2067:   }

2069:   PetscFree2(rows,sizes);
2070:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
2071:   return(0);
2072: }

2076: PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2077: {
2078:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2079:   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2080:   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2081:   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2083:   PetscInt       ridx,cidx,bs2=a->bs2;
2084:   PetscTruth     roworiented=a->roworiented;
2085:   MatScalar      *ap,value,*aa=a->a,*bap;

2089:   for (k=0; k<m; k++) { /* loop over added rows */
2090:     row  = im[k];
2091:     brow = row/bs;
2092:     if (row < 0) continue;
2093: #if defined(PETSC_USE_DEBUG)  
2094:     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2095: #endif
2096:     rp   = aj + ai[brow];
2097:     ap   = aa + bs2*ai[brow];
2098:     rmax = imax[brow];
2099:     nrow = ailen[brow];
2100:     low  = 0;
2101:     high = nrow;
2102:     for (l=0; l<n; l++) { /* loop over added columns */
2103:       if (in[l] < 0) continue;
2104: #if defined(PETSC_USE_DEBUG)  
2105:       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2106: #endif
2107:       col = in[l]; bcol = col/bs;
2108:       ridx = row % bs; cidx = col % bs;
2109:       if (roworiented) {
2110:         value = v[l + k*n];
2111:       } else {
2112:         value = v[k + l*m];
2113:       }
2114:       if (col <= lastcol) low = 0; else high = nrow;
2115:       lastcol = col;
2116:       while (high-low > 7) {
2117:         t = (low+high)/2;
2118:         if (rp[t] > bcol) high = t;
2119:         else              low  = t;
2120:       }
2121:       for (i=low; i<high; i++) {
2122:         if (rp[i] > bcol) break;
2123:         if (rp[i] == bcol) {
2124:           bap  = ap +  bs2*i + bs*cidx + ridx;
2125:           if (is == ADD_VALUES) *bap += value;
2126:           else                  *bap  = value;
2127:           goto noinsert1;
2128:         }
2129:       }
2130:       if (nonew == 1) goto noinsert1;
2131:       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2132:       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2133:       N = nrow++ - 1; high++;
2134:       /* shift up all the later entries in this row */
2135:       for (ii=N; ii>=i; ii--) {
2136:         rp[ii+1] = rp[ii];
2137:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
2138:       }
2139:       if (N>=i) {
2140:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
2141:       }
2142:       rp[i]                      = bcol;
2143:       ap[bs2*i + bs*cidx + ridx] = value;
2144:       a->nz++;
2145:       noinsert1:;
2146:       low = i;
2147:     }
2148:     ailen[brow] = nrow;
2149:   }
2150:   A->same_nonzero = PETSC_FALSE;
2151:   return(0);
2152: }

2156: PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2157: {
2158:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2159:   Mat            outA;
2161:   PetscTruth     row_identity,col_identity;

2164:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2165:   ISIdentity(row,&row_identity);
2166:   ISIdentity(col,&col_identity);
2167:   if (!row_identity || !col_identity) {
2168:     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2169:   }

2171:   outA          = inA;
2172:   inA->factor   = MAT_FACTOR_LU;

2174:   MatMarkDiagonal_SeqBAIJ(inA);

2176:   PetscObjectReference((PetscObject)row);
2177:   if (a->row) { ISDestroy(a->row); }
2178:   a->row = row;
2179:   PetscObjectReference((PetscObject)col);
2180:   if (a->col) { ISDestroy(a->col); }
2181:   a->col = col;
2182: 
2183:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2184:   if (a->icol) {
2185:     ISDestroy(a->icol);
2186:   }
2187:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
2188:   PetscLogObjectParent(inA,a->icol);
2189: 
2190:   MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));
2191:   if (!a->solve_work) {
2192:     PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);
2193:     PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));
2194:   }
2195:   MatLUFactorNumeric(outA,inA,info);

2197:   return(0);
2198: }

2203: PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2204: {
2205:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2206:   PetscInt    i,nz,mbs;

2209:   nz  = baij->maxnz/baij->bs2;
2210:   mbs = baij->mbs;
2211:   for (i=0; i<nz; i++) {
2212:     baij->j[i] = indices[i];
2213:   }
2214:   baij->nz = nz;
2215:   for (i=0; i<mbs; i++) {
2216:     baij->ilen[i] = baij->imax[i];
2217:   }
2218:   return(0);
2219: }

2224: /*@
2225:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2226:        in the matrix.

2228:   Input Parameters:
2229: +  mat - the SeqBAIJ matrix
2230: -  indices - the column indices

2232:   Level: advanced

2234:   Notes:
2235:     This can be called if you have precomputed the nonzero structure of the 
2236:   matrix and want to provide it to the matrix object to improve the performance
2237:   of the MatSetValues() operation.

2239:     You MUST have set the correct numbers of nonzeros per row in the call to 
2240:   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.

2242:     MUST be called before any calls to MatSetValues();

2244: @*/
2245: PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2246: {
2247:   PetscErrorCode ierr,(*f)(Mat,PetscInt *);

2252:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
2253:   if (f) {
2254:     (*f)(mat,indices);
2255:   } else {
2256:     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2257:   }
2258:   return(0);
2259: }

2263: PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2264: {
2265:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2267:   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2268:   PetscReal      atmp;
2269:   PetscScalar    *x,zero = 0.0;
2270:   MatScalar      *aa;
2271:   PetscInt       ncols,brow,krow,kcol;

2274:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2275:   bs   = A->rmap->bs;
2276:   aa   = a->a;
2277:   ai   = a->i;
2278:   aj   = a->j;
2279:   mbs  = a->mbs;

2281:   VecSet(v,zero);
2282:   VecGetArray(v,&x);
2283:   VecGetLocalSize(v,&n);
2284:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2285:   for (i=0; i<mbs; i++) {
2286:     ncols = ai[1] - ai[0]; ai++;
2287:     brow  = bs*i;
2288:     for (j=0; j<ncols; j++){
2289:       for (kcol=0; kcol<bs; kcol++){
2290:         for (krow=0; krow<bs; krow++){
2291:           atmp = PetscAbsScalar(*aa);aa++;
2292:           row = brow + krow;    /* row index */
2293:           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2294:           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2295:         }
2296:       }
2297:       aj++;
2298:     }
2299:   }
2300:   VecRestoreArray(v,&x);
2301:   return(0);
2302: }

2306: PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2307: {

2311:   /* If the two matrices have the same copy implementation, use fast copy. */
2312:   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2313:     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2314:     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;

2316:     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2317:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2318:     }
2319:     PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));
2320:   } else {
2321:     MatCopy_Basic(A,B,str);
2322:   }
2323:   return(0);
2324: }

2328: PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2329: {

2333:    MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);
2334:   return(0);
2335: }

2339: PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2340: {
2341:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2343:   *array = a->a;
2344:   return(0);
2345: }

2349: PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2350: {
2352:   return(0);
2353: }

2357: PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2358: {
2359:   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2361:   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2362:   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);

2365:   if (str == SAME_NONZERO_PATTERN) {
2366:     PetscScalar alpha = a;
2367:     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2368:   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2369:     if (y->xtoy && y->XtoY != X) {
2370:       PetscFree(y->xtoy);
2371:       MatDestroy(y->XtoY);
2372:     }
2373:     if (!y->xtoy) { /* get xtoy */
2374:       MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);
2375:       y->XtoY = X;
2376:       PetscObjectReference((PetscObject)X);
2377:     }
2378:     bs2 = bs*bs;
2379:     for (i=0; i<x->nz; i++) {
2380:       j = 0;
2381:       while (j < bs2){
2382:         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2383:         j++;
2384:       }
2385:     }
2386:     PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2387:   } else {
2388:     MatAXPY_Basic(Y,a,X,str);
2389:   }
2390:   return(0);
2391: }

2395: PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2396: {
2397:   PetscInt rbs,cbs;

2401:   PetscLayoutGetBlockSize(A->rmap,&rbs);
2402:   PetscLayoutGetBlockSize(A->cmap,&cbs);
2403:   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2404:   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2405:   return(0);
2406: }

2410: PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2411: {
2412:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2413:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2414:   MatScalar      *aa = a->a;

2417:   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2418:   return(0);
2419: }

2423: PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2424: {
2425:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2426:   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2427:   MatScalar      *aa = a->a;

2430:   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2431:   return(0);
2432: }


2438: /*
2439:     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2440: */
2441: PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2442: {
2443:   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2445:   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2446:   PetscInt       nz = a->i[m],row,*jj,mr,col;

2449:   *nn = n;
2450:   if (!ia) return(0);
2451:   if (symmetric) {
2452:     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2453:   } else {
2454:     PetscMalloc((n+1)*sizeof(PetscInt),&collengths);
2455:     PetscMemzero(collengths,n*sizeof(PetscInt));
2456:     PetscMalloc((n+1)*sizeof(PetscInt),&cia);
2457:     PetscMalloc((nz+1)*sizeof(PetscInt),&cja);
2458:     jj = a->j;
2459:     for (i=0; i<nz; i++) {
2460:       collengths[jj[i]]++;
2461:     }
2462:     cia[0] = oshift;
2463:     for (i=0; i<n; i++) {
2464:       cia[i+1] = cia[i] + collengths[i];
2465:     }
2466:     PetscMemzero(collengths,n*sizeof(PetscInt));
2467:     jj   = a->j;
2468:     for (row=0; row<m; row++) {
2469:       mr = a->i[row+1] - a->i[row];
2470:       for (i=0; i<mr; i++) {
2471:         col = *jj++;
2472:         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473:       }
2474:     }
2475:     PetscFree(collengths);
2476:     *ia = cia; *ja = cja;
2477:   }
2478:   return(0);
2479: }

2483: PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2484: {

2488:   if (!ia) return(0);
2489:   PetscFree(*ia);
2490:   PetscFree(*ja);
2491:   return(0);
2492: }

2496: PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2497: {
2498:   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2500:   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2501:   PetscScalar    dx,*y,*xx,*w3_array;
2502:   PetscScalar    *vscale_array;
2503:   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2504:   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2505:   void           *fctx = coloring->fctx;
2506:   PetscTruth     flg = PETSC_FALSE;
2507:   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2508:   Vec            x1_tmp;

2514:   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");

2516:   PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);
2517:   MatSetUnfactored(J);
2518:   PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);
2519:   if (flg) {
2520:     PetscInfo(coloring,"Not calling MatZeroEntries()\n");
2521:   } else {
2522:     PetscTruth assembled;
2523:     MatAssembled(J,&assembled);
2524:     if (assembled) {
2525:       MatZeroEntries(J);
2526:     }
2527:   }

2529:   x1_tmp = x1;
2530:   if (!coloring->vscale){
2531:     VecDuplicate(x1_tmp,&coloring->vscale);
2532:   }
2533: 
2534:   /*
2535:     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2536:     coloring->F for the coarser grids from the finest
2537:   */
2538:   if (coloring->F) {
2539:     VecGetLocalSize(coloring->F,&m1);
2540:     VecGetLocalSize(w1,&m2);
2541:     if (m1 != m2) {
2542:       coloring->F = 0;
2543:       }
2544:     }

2546:   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2547:     VecNorm(x1_tmp,NORM_2,&unorm);
2548:   }
2549:   VecGetOwnershipRange(w1,&start,&end); /* OwnershipRange is used by ghosted x! */
2550: 
2551:   /* Set w1 = F(x1) */
2552:   if (coloring->F) {
2553:     w1          = coloring->F; /* use already computed value of function */
2554:     coloring->F = 0;
2555:   } else {
2556:     PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2557:     (*f)(sctx,x1_tmp,w1,fctx);
2558:     PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2559:   }
2560: 
2561:   if (!coloring->w3) {
2562:     VecDuplicate(x1_tmp,&coloring->w3);
2563:     PetscLogObjectParent(coloring,coloring->w3);
2564:   }
2565:   w3 = coloring->w3;

2567:     CHKMEMQ;
2568:     /* Compute all the local scale factors, including ghost points */
2569:   VecGetLocalSize(x1_tmp,&N);
2570:   VecGetArray(x1_tmp,&xx);
2571:   VecGetArray(coloring->vscale,&vscale_array);
2572:   if (ctype == IS_COLORING_GHOSTED){
2573:     col_start = 0; col_end = N;
2574:   } else if (ctype == IS_COLORING_GLOBAL){
2575:     xx = xx - start;
2576:     vscale_array = vscale_array - start;
2577:     col_start = start; col_end = N + start;
2578:   }    CHKMEMQ;
2579:   for (col=col_start; col<col_end; col++){
2580:     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2581:     if (coloring->htype[0] == 'w') {
2582:       dx = 1.0 + unorm;
2583:     } else {
2584:       dx  = xx[col];
2585:     }
2586:     if (dx == 0.0) dx = 1.0;
2587: #if !defined(PETSC_USE_COMPLEX)
2588:     if (dx < umin && dx >= 0.0)      dx = umin;
2589:     else if (dx < 0.0 && dx > -umin) dx = -umin;
2590: #else
2591:     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2592:     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2593: #endif
2594:     dx               *= epsilon;
2595:     vscale_array[col] = 1.0/dx;
2596:   }     CHKMEMQ;
2597:   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2598:   VecRestoreArray(coloring->vscale,&vscale_array);
2599:   if (ctype == IS_COLORING_GLOBAL){
2600:     VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2601:     VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);
2602:   }
2603:         CHKMEMQ;
2604:   if (coloring->vscaleforrow) {
2605:     vscaleforrow = coloring->vscaleforrow;
2606:   } else {
2607:     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2608:   }


2611:   PetscMalloc(bs*sizeof(PetscInt),&srows);
2612:   /*
2613:     Loop over each color
2614:   */
2615:   VecGetArray(coloring->vscale,&vscale_array);
2616:   for (k=0; k<coloring->ncolors; k++) {
2617:     coloring->currentcolor = k;
2618:     for (i=0; i<bs; i++) {
2619:       VecCopy(x1_tmp,w3);
2620:       VecGetArray(w3,&w3_array);
2621:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2622:       /*
2623:         Loop over each column associated with color 
2624:         adding the perturbation to the vector w3.
2625:       */
2626:       for (l=0; l<coloring->ncolumns[k]; l++) {
2627:         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2628:         if (coloring->htype[0] == 'w') {
2629:           dx = 1.0 + unorm;
2630:         } else {
2631:           dx  = xx[col];
2632:         }
2633:         if (dx == 0.0) dx = 1.0;
2634: #if !defined(PETSC_USE_COMPLEX)
2635:         if (dx < umin && dx >= 0.0)      dx = umin;
2636:         else if (dx < 0.0 && dx > -umin) dx = -umin;
2637: #else
2638:         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2639:         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2640: #endif
2641:         dx            *= epsilon;
2642:         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2643:         w3_array[col] += dx;
2644:       }
2645:       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2646:       VecRestoreArray(w3,&w3_array);

2648:       /*
2649:         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2650:         w2 = F(x1 + dx) - F(x1)
2651:       */
2652:       PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);
2653:       (*f)(sctx,w3,w2,fctx);
2654:       PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);
2655:       VecAXPY(w2,-1.0,w1);
2656: 
2657:       /*
2658:         Loop over rows of vector, putting results into Jacobian matrix
2659:       */
2660:       VecGetArray(w2,&y);
2661:       for (l=0; l<coloring->nrows[k]; l++) {
2662:         row    = bs*coloring->rows[k][l];             /* local row index */
2663:         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2664:         for (j=0; j<bs; j++) {
2665:             y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2666:           srows[j]  = row + start + j;
2667:         }
2668:         MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);
2669:       }
2670:       VecRestoreArray(w2,&y);
2671:     }
2672:   } /* endof for each color */
2673:   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2674:   VecRestoreArray(coloring->vscale,&vscale_array);
2675:   VecRestoreArray(x1_tmp,&xx);
2676:   PetscFree(srows);
2677: 
2678:   coloring->currentcolor = -1;
2679:   MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2680:   MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2681:   PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);
2682:   return(0);
2683: }

2685: /* -------------------------------------------------------------------*/
2686: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2687:        MatGetRow_SeqBAIJ,
2688:        MatRestoreRow_SeqBAIJ,
2689:        MatMult_SeqBAIJ_N,
2690: /* 4*/ MatMultAdd_SeqBAIJ_N,
2691:        MatMultTranspose_SeqBAIJ,
2692:        MatMultTransposeAdd_SeqBAIJ,
2693:        0,
2694:        0,
2695:        0,
2696: /*10*/ 0,
2697:        MatLUFactor_SeqBAIJ,
2698:        0,
2699:        0,
2700:        MatTranspose_SeqBAIJ,
2701: /*15*/ MatGetInfo_SeqBAIJ,
2702:        MatEqual_SeqBAIJ,
2703:        MatGetDiagonal_SeqBAIJ,
2704:        MatDiagonalScale_SeqBAIJ,
2705:        MatNorm_SeqBAIJ,
2706: /*20*/ 0,
2707:        MatAssemblyEnd_SeqBAIJ,
2708:        MatSetOption_SeqBAIJ,
2709:        MatZeroEntries_SeqBAIJ,
2710: /*24*/ MatZeroRows_SeqBAIJ,
2711:        0,
2712:        0,
2713:        0,
2714:        0,
2715: /*29*/ MatSetUpPreallocation_SeqBAIJ,
2716:        0,
2717:        0,
2718:        MatGetArray_SeqBAIJ,
2719:        MatRestoreArray_SeqBAIJ,
2720: /*34*/ MatDuplicate_SeqBAIJ,
2721:        0,
2722:        0,
2723:        MatILUFactor_SeqBAIJ,
2724:        0,
2725: /*39*/ MatAXPY_SeqBAIJ,
2726:        MatGetSubMatrices_SeqBAIJ,
2727:        MatIncreaseOverlap_SeqBAIJ,
2728:        MatGetValues_SeqBAIJ,
2729:        MatCopy_SeqBAIJ,
2730: /*44*/ 0,
2731:        MatScale_SeqBAIJ,
2732:        0,
2733:        0,
2734:        0,
2735: /*49*/ MatSetBlockSize_SeqBAIJ,
2736:        MatGetRowIJ_SeqBAIJ,
2737:        MatRestoreRowIJ_SeqBAIJ,
2738:        MatGetColumnIJ_SeqBAIJ,
2739:        MatRestoreColumnIJ_SeqBAIJ,
2740: /*54*/ MatFDColoringCreate_SeqAIJ,
2741:        0,
2742:        0,
2743:        0,
2744:        MatSetValuesBlocked_SeqBAIJ,
2745: /*59*/ MatGetSubMatrix_SeqBAIJ,
2746:        MatDestroy_SeqBAIJ,
2747:        MatView_SeqBAIJ,
2748:        0,
2749:        0,
2750: /*64*/ 0,
2751:        0,
2752:        0,
2753:        0,
2754:        0,
2755: /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2756:        0,
2757:        MatConvert_Basic,
2758:        0,
2759:        0,
2760: /*74*/ 0,
2761:        MatFDColoringApply_BAIJ,
2762:        0,
2763:        0,
2764:        0,
2765: /*79*/ 0,
2766:        0,
2767:        0,
2768:        0,
2769:        MatLoad_SeqBAIJ,
2770: /*84*/ 0,
2771:        0,
2772:        0,
2773:        0,
2774:        0,
2775: /*89*/ 0,
2776:        0,
2777:        0,
2778:        0,
2779:        0,
2780: /*94*/ 0,
2781:        0,
2782:        0,
2783:        0,
2784:        0,
2785: /*99*/0,
2786:        0,
2787:        0,
2788:        0,
2789:        0,
2790: /*104*/0,
2791:        MatRealPart_SeqBAIJ,
2792:        MatImaginaryPart_SeqBAIJ,
2793:        0,
2794:        0,
2795: /*109*/0,
2796:        0,
2797:        0,
2798:        0,
2799:        MatMissingDiagonal_SeqBAIJ,
2800: /*114*/0,
2801:        0,
2802:        0,
2803:        0,
2804:        0,
2805: /*119*/0,
2806:        0,
2807:        MatMultHermitianTranspose_SeqBAIJ,
2808:        MatMultHermitianTransposeAdd_SeqBAIJ
2809: };

2814: PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2815: {
2816:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2817:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2821:   if (aij->nonew != 1) {
2822:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2823:   }

2825:   /* allocate space for values if not already there */
2826:   if (!aij->saved_values) {
2827:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
2828:     PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));
2829:   }

2831:   /* copy values over */
2832:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
2833:   return(0);
2834: }

2840: PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2841: {
2842:   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2844:   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;

2847:   if (aij->nonew != 1) {
2848:     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2849:   }
2850:   if (!aij->saved_values) {
2851:     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2852:   }

2854:   /* copy values over */
2855:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
2856:   return(0);
2857: }


2868: PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2869: {
2870:   Mat_SeqBAIJ    *b;
2872:   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2873:   PetscTruth     flg,skipallocation = PETSC_FALSE;


2877:   if (nz == MAT_SKIP_ALLOCATION) {
2878:     skipallocation = PETSC_TRUE;
2879:     nz             = 0;
2880:   }

2882:   if (bs < 0) {
2883:     PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");
2884:       PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);
2885:     PetscOptionsEnd();
2886:     bs   = PetscAbs(bs);
2887:   }
2888:   if (nnz && newbs != bs) {
2889:     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2890:   }
2891:   bs   = newbs;

2893:   PetscLayoutSetBlockSize(B->rmap,bs);
2894:   PetscLayoutSetBlockSize(B->cmap,bs);
2895:   PetscLayoutSetUp(B->rmap);
2896:   PetscLayoutSetUp(B->cmap);

2898:   B->preallocated = PETSC_TRUE;

2900:   mbs  = B->rmap->n/bs;
2901:   nbs  = B->cmap->n/bs;
2902:   bs2  = bs*bs;

2904:   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2905:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2906:   }

2908:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2909:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2910:   if (nnz) {
2911:     for (i=0; i<mbs; i++) {
2912:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2913:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2914:     }
2915:   }

2917:   b       = (Mat_SeqBAIJ*)B->data;
2918:   PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");
2919:     PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);
2920:   PetscOptionsEnd();

2922:   if (!flg) {
2923:     switch (bs) {
2924:     case 1:
2925:       B->ops->mult            = MatMult_SeqBAIJ_1;
2926:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2927:       B->ops->sor             = MatSOR_SeqBAIJ_1;
2928:       break;
2929:     case 2:
2930:       B->ops->mult            = MatMult_SeqBAIJ_2;
2931:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2932:       B->ops->sor             = MatSOR_SeqBAIJ_2;
2933:       break;
2934:     case 3:
2935:       B->ops->mult            = MatMult_SeqBAIJ_3;
2936:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2937:       B->ops->sor             = MatSOR_SeqBAIJ_3;
2938:       break;
2939:     case 4:
2940:       B->ops->mult            = MatMult_SeqBAIJ_4;
2941:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2942:       B->ops->sor             = MatSOR_SeqBAIJ_4;
2943:       break;
2944:     case 5:
2945:       B->ops->mult            = MatMult_SeqBAIJ_5;
2946:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2947:       B->ops->sor             = MatSOR_SeqBAIJ_5;
2948:       break;
2949:     case 6:
2950:       B->ops->mult            = MatMult_SeqBAIJ_6;
2951:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2952:       B->ops->sor             = MatSOR_SeqBAIJ_6;
2953:       break;
2954:     case 7:
2955:       B->ops->mult            = MatMult_SeqBAIJ_7;
2956:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2957:       B->ops->sor             = MatSOR_SeqBAIJ_7;
2958:       break;
2959:     case 15:
2960:       B->ops->mult = MatMult_SeqBAIJ_15_ver1;
2961:       break;
2962:     default:
2963:       B->ops->mult            = MatMult_SeqBAIJ_N;
2964:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2965:       break;
2966:     }
2967:   }
2968:   B->rmap->bs      = bs;
2969:   b->mbs     = mbs;
2970:   b->nbs     = nbs;
2971:   if (!skipallocation) {
2972:     if (!b->imax) {
2973:       PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);
2974:       PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2975:       b->free_imax_ilen = PETSC_TRUE;
2976:     }
2977:     /* b->ilen will count nonzeros in each block row so far. */
2978:     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2979:     if (!nnz) {
2980:       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2981:       else if (nz <= 0)        nz = 1;
2982:       for (i=0; i<mbs; i++) b->imax[i] = nz;
2983:       nz = nz*mbs;
2984:     } else {
2985:       nz = 0;
2986:       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2987:     }

2989:     /* allocate the matrix space */
2990:     MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);
2991:     PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);
2992:     PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));
2993:     PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2994:     PetscMemzero(b->j,nz*sizeof(PetscInt));
2995:     b->singlemalloc = PETSC_TRUE;
2996:     b->i[0] = 0;
2997:     for (i=1; i<mbs+1; i++) {
2998:       b->i[i] = b->i[i-1] + b->imax[i-1];
2999:     }
3000:     b->free_a     = PETSC_TRUE;
3001:     b->free_ij    = PETSC_TRUE;
3002:   } else {
3003:     b->free_a     = PETSC_FALSE;
3004:     b->free_ij    = PETSC_FALSE;
3005:   }

3007:   B->rmap->bs          = bs;
3008:   b->bs2              = bs2;
3009:   b->mbs              = mbs;
3010:   b->nz               = 0;
3011:   b->maxnz            = nz*bs2;
3012:   B->info.nz_unneeded = (PetscReal)b->maxnz;
3013:   return(0);
3014: }

3020: PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3021: {
3022:   PetscInt       i,m,nz,nz_max=0,*nnz;
3023:   PetscScalar    *values=0;


3028:   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);

3030:   PetscLayoutSetBlockSize(B->rmap,bs);
3031:   PetscLayoutSetBlockSize(B->cmap,bs);
3032:   PetscLayoutSetUp(B->rmap);
3033:   PetscLayoutSetUp(B->cmap);
3034:   m = B->rmap->n/bs;

3036:   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3037:   PetscMalloc((m+1) * sizeof(PetscInt), &nnz);
3038:   for(i=0; i<m; i++) {
3039:     nz = ii[i+1]- ii[i];
3040:     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3041:     nz_max = PetscMax(nz_max, nz);
3042:     nnz[i] = nz;
3043:   }
3044:   MatSeqBAIJSetPreallocation(B,bs,0,nnz);
3045:   PetscFree(nnz);

3047:   values = (PetscScalar*)V;
3048:   if (!values) {
3049:     PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);
3050:     PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));
3051:   }
3052:   for (i=0; i<m; i++) {
3053:     PetscInt          ncols  = ii[i+1] - ii[i];
3054:     const PetscInt    *icols = jj + ii[i];
3055:     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3056:     MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);
3057:   }
3058:   if (!V) { PetscFree(values); }
3059:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3060:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3062:   return(0);
3063: }



3072: /*MC
3073:    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 
3074:    block sparse compressed row format.

3076:    Options Database Keys:
3077: . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()

3079:   Level: beginner

3081: .seealso: MatCreateSeqBAIJ()
3082: M*/


3088: PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3089: {
3091:   PetscMPIInt    size;
3092:   Mat_SeqBAIJ    *b;

3095:   MPI_Comm_size(((PetscObject)B)->comm,&size);
3096:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

3098:   PetscNewLog(B,Mat_SeqBAIJ,&b);
3099:   B->data = (void*)b;
3100:   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
3101:   B->mapping               = 0;
3102:   b->row                   = 0;
3103:   b->col                   = 0;
3104:   b->icol                  = 0;
3105:   b->reallocs              = 0;
3106:   b->saved_values          = 0;

3108:   b->roworiented           = PETSC_TRUE;
3109:   b->nonew                 = 0;
3110:   b->diag                  = 0;
3111:   b->solve_work            = 0;
3112:   b->mult_work             = 0;
3113:   B->spptr                 = 0;
3114:   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3115:   b->keepnonzeropattern    = PETSC_FALSE;
3116:   b->xtoy                  = 0;
3117:   b->XtoY                  = 0;
3118:   b->compressedrow.use     = PETSC_FALSE;
3119:   b->compressedrow.nrows   = 0;
3120:   b->compressedrow.i       = PETSC_NULL;
3121:   b->compressedrow.rindex  = PETSC_NULL;
3122:   b->compressedrow.checked = PETSC_FALSE;
3123:   B->same_nonzero          = PETSC_FALSE;

3125:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3126:                                      "MatGetFactorAvailable_seqbaij_petsc",
3127:                                      MatGetFactorAvailable_seqbaij_petsc);
3128:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3129:                                      "MatGetFactor_seqbaij_petsc",
3130:                                      MatGetFactor_seqbaij_petsc);
3131:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3132:                                      "MatInvertBlockDiagonal_SeqBAIJ",
3133:                                       MatInvertBlockDiagonal_SeqBAIJ);
3134:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3135:                                      "MatStoreValues_SeqBAIJ",
3136:                                       MatStoreValues_SeqBAIJ);
3137:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3138:                                      "MatRetrieveValues_SeqBAIJ",
3139:                                       MatRetrieveValues_SeqBAIJ);
3140:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3141:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3142:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
3143:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3144:                                      "MatConvert_SeqBAIJ_SeqAIJ",
3145:                                       MatConvert_SeqBAIJ_SeqAIJ);
3146:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3147:                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3148:                                       MatConvert_SeqBAIJ_SeqSBAIJ);
3149:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3150:                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3151:                                       MatSeqBAIJSetPreallocation_SeqBAIJ);
3152:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3153:                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3154:                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);
3155:   PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);
3156:   return(0);
3157: }

3162: PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3163: {
3164:   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3166:   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;

3169:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

3171:   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172:     c->imax = a->imax;
3173:     c->ilen = a->ilen;
3174:     c->free_imax_ilen = PETSC_FALSE;
3175:   } else {
3176:     PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);
3177:     PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));
3178:     for (i=0; i<mbs; i++) {
3179:       c->imax[i] = a->imax[i];
3180:       c->ilen[i] = a->ilen[i];
3181:     }
3182:     c->free_imax_ilen = PETSC_TRUE;
3183:   }

3185:   /* allocate the matrix space */
3186:   if (mallocmatspace){
3187:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3188:       PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);
3189:       PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));
3190:       c->singlemalloc = PETSC_FALSE;
3191:       c->free_ij      = PETSC_FALSE;
3192:       c->i            = a->i;
3193:       c->j            = a->j;
3194:       c->parent       = A;
3195:       PetscObjectReference((PetscObject)A);
3196:       MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3197:       MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
3198:     } else {
3199:       PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);
3200:       PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));
3201:       c->singlemalloc = PETSC_TRUE;
3202:       c->free_ij      = PETSC_TRUE;
3203:       PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));
3204:       if (mbs > 0) {
3205:         PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));
3206:         if (cpvalues == MAT_COPY_VALUES) {
3207:           PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
3208:         } else {
3209:           PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
3210:         }
3211:       }
3212:     }
3213:   }

3215:   c->roworiented = a->roworiented;
3216:   c->nonew       = a->nonew;
3217:   PetscLayoutCopy(A->rmap,&C->rmap);
3218:   PetscLayoutCopy(A->cmap,&C->cmap);
3219:   c->bs2         = a->bs2;
3220:   c->mbs         = a->mbs;
3221:   c->nbs         = a->nbs;

3223:   if (a->diag) {
3224:     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225:       c->diag      = a->diag;
3226:       c->free_diag = PETSC_FALSE;
3227:     } else {
3228:       PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);
3229:       PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));
3230:       for (i=0; i<mbs; i++) {
3231:         c->diag[i] = a->diag[i];
3232:       }
3233:       c->free_diag = PETSC_TRUE;
3234:     }
3235:   } else c->diag        = 0;
3236:   c->nz                 = a->nz;
3237:   c->maxnz              = a->maxnz;
3238:   c->solve_work         = 0;
3239:   c->mult_work          = 0;
3240:   c->free_a             = PETSC_TRUE;
3241:   c->free_ij            = PETSC_TRUE;
3242:   C->preallocated       = PETSC_TRUE;
3243:   C->assembled          = PETSC_TRUE;

3245:   c->compressedrow.use     = a->compressedrow.use;
3246:   c->compressedrow.nrows   = a->compressedrow.nrows;
3247:   c->compressedrow.checked = a->compressedrow.checked;
3248:   if (a->compressedrow.checked && a->compressedrow.use){
3249:     i = a->compressedrow.nrows;
3250:     PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);
3251:     PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));
3252:     PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));
3253:     PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));
3254:   } else {
3255:     c->compressedrow.use    = PETSC_FALSE;
3256:     c->compressedrow.i      = PETSC_NULL;
3257:     c->compressedrow.rindex = PETSC_NULL;
3258:   }
3259:   C->same_nonzero = A->same_nonzero;
3260:   PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);
3261:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
3262:   return(0);
3263: }

3267: PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3268: {

3272:   MatCreate(((PetscObject)A)->comm,B);
3273:   MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);
3274:   MatSetType(*B,MATSEQBAIJ);
3275:   MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3276:   return(0);
3277: }

3281: PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3282: {
3283:   Mat_SeqBAIJ    *a;
3284:   Mat            B;
3286:   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3287:   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3288:   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3289:   PetscInt       *masked,nmask,tmp,bs2,ishift;
3290:   PetscMPIInt    size;
3291:   int            fd;
3292:   PetscScalar    *aa;
3293:   MPI_Comm       comm = ((PetscObject)viewer)->comm;

3296:   PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");
3297:     PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);
3298:   PetscOptionsEnd();
3299:   bs2  = bs*bs;

3301:   MPI_Comm_size(comm,&size);
3302:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3303:   PetscViewerBinaryGetDescriptor(viewer,&fd);
3304:   PetscBinaryRead(fd,header,4,PETSC_INT);
3305:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3306:   M = header[1]; N = header[2]; nz = header[3];

3308:   if (header[3] < 0) {
3309:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3310:   }

3312:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

3314:   /* 
3315:      This code adds extra rows to make sure the number of rows is 
3316:     divisible by the blocksize
3317:   */
3318:   mbs        = M/bs;
3319:   extra_rows = bs - M + bs*(mbs);
3320:   if (extra_rows == bs) extra_rows = 0;
3321:   else                  mbs++;
3322:   if (extra_rows) {
3323:     PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");
3324:   }

3326:   /* read in row lengths */
3327:   PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);
3328:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
3329:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

3331:   /* read in column indices */
3332:   PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);
3333:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
3334:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

3336:   /* loop over row lengths determining block row lengths */
3337:   PetscMalloc(mbs*sizeof(PetscInt),&browlengths);
3338:   PetscMemzero(browlengths,mbs*sizeof(PetscInt));
3339:   PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);
3340:   PetscMemzero(mask,mbs*sizeof(PetscInt));
3341:   rowcount = 0;
3342:   nzcount = 0;
3343:   for (i=0; i<mbs; i++) {
3344:     nmask = 0;
3345:     for (j=0; j<bs; j++) {
3346:       kmax = rowlengths[rowcount];
3347:       for (k=0; k<kmax; k++) {
3348:         tmp = jj[nzcount++]/bs;
3349:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3350:       }
3351:       rowcount++;
3352:     }
3353:     browlengths[i] += nmask;
3354:     /* zero out the mask elements we set */
3355:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356:   }

3358:   /* create our matrix */
3359:   MatCreate(comm,&B);
3360:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3361:   MatSetType(B,type);
3362:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);
3363:   a = (Mat_SeqBAIJ*)B->data;

3365:   /* set matrix "i" values */
3366:   a->i[0] = 0;
3367:   for (i=1; i<= mbs; i++) {
3368:     a->i[i]      = a->i[i-1] + browlengths[i-1];
3369:     a->ilen[i-1] = browlengths[i-1];
3370:   }
3371:   a->nz         = 0;
3372:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

3374:   /* read in nonzero values */
3375:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
3376:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
3377:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

3379:   /* set "a" and "j" values into matrix */
3380:   nzcount = 0; jcount = 0;
3381:   for (i=0; i<mbs; i++) {
3382:     nzcountb = nzcount;
3383:     nmask    = 0;
3384:     for (j=0; j<bs; j++) {
3385:       kmax = rowlengths[i*bs+j];
3386:       for (k=0; k<kmax; k++) {
3387:         tmp = jj[nzcount++]/bs;
3388:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3389:       }
3390:     }
3391:     /* sort the masked values */
3392:     PetscSortInt(nmask,masked);

3394:     /* set "j" values into matrix */
3395:     maskcount = 1;
3396:     for (j=0; j<nmask; j++) {
3397:       a->j[jcount++]  = masked[j];
3398:       mask[masked[j]] = maskcount++;
3399:     }
3400:     /* set "a" values into matrix */
3401:     ishift = bs2*a->i[i];
3402:     for (j=0; j<bs; j++) {
3403:       kmax = rowlengths[i*bs+j];
3404:       for (k=0; k<kmax; k++) {
3405:         tmp       = jj[nzcountb]/bs ;
3406:         block     = mask[tmp] - 1;
3407:         point     = jj[nzcountb] - bs*tmp;
3408:         idx       = ishift + bs2*block + j + bs*point;
3409:         a->a[idx] = (MatScalar)aa[nzcountb++];
3410:       }
3411:     }
3412:     /* zero out the mask elements we set */
3413:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3414:   }
3415:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

3417:   PetscFree(rowlengths);
3418:   PetscFree(browlengths);
3419:   PetscFree(aa);
3420:   PetscFree(jj);
3421:   PetscFree2(mask,masked);

3423:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3424:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3425:   MatView_Private(B);

3427:   *A = B;
3428:   return(0);
3429: }

3433: /*@C
3434:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3435:    compressed row) format.  For good matrix assembly performance the
3436:    user should preallocate the matrix storage by setting the parameter nz
3437:    (or the array nnz).  By setting these parameters accurately, performance
3438:    during matrix assembly can be increased by more than a factor of 50.

3440:    Collective on MPI_Comm

3442:    Input Parameters:
3443: +  comm - MPI communicator, set to PETSC_COMM_SELF
3444: .  bs - size of block
3445: .  m - number of rows
3446: .  n - number of columns
3447: .  nz - number of nonzero blocks  per block row (same for all rows)
3448: -  nnz - array containing the number of nonzero blocks in the various block rows 
3449:          (possibly different for each block row) or PETSC_NULL

3451:    Output Parameter:
3452: .  A - the matrix 

3454:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455:    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

3458:    Options Database Keys:
3459: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3460:                      block calculations (much slower)
3461: .    -mat_block_size - size of the blocks to use

3463:    Level: intermediate

3465:    Notes:
3466:    The number of rows and columns must be divisible by blocksize.

3468:    If the nnz parameter is given then the nz parameter is ignored

3470:    A nonzero block is any block that as 1 or more nonzeros in it

3472:    The block AIJ format is fully compatible with standard Fortran 77
3473:    storage.  That is, the stored row and column indices can begin at
3474:    either one (as in Fortran) or zero.  See the users' manual for details.

3476:    Specify the preallocated storage with either nz or nnz (not both).
3477:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3478:    allocation.  For additional details, see the users manual chapter on
3479:    matrices.

3481: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3482: @*/
3483: PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3484: {
3486: 
3488:   MatCreate(comm,A);
3489:   MatSetSizes(*A,m,n,m,n);
3490:   MatSetType(*A,MATSEQBAIJ);
3491:   MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);
3492:   return(0);
3493: }

3497: /*@C
3498:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3499:    per row in the matrix. For good matrix assembly performance the
3500:    user should preallocate the matrix storage by setting the parameter nz
3501:    (or the array nnz).  By setting these parameters accurately, performance
3502:    during matrix assembly can be increased by more than a factor of 50.

3504:    Collective on MPI_Comm

3506:    Input Parameters:
3507: +  A - the matrix
3508: .  bs - size of block
3509: .  nz - number of block nonzeros per block row (same for all rows)
3510: -  nnz - array containing the number of block nonzeros in the various block rows 
3511:          (possibly different for each block row) or PETSC_NULL

3513:    Options Database Keys:
3514: .   -mat_no_unroll - uses code that does not unroll the loops in the 
3515:                      block calculations (much slower)
3516: .    -mat_block_size - size of the blocks to use

3518:    Level: intermediate

3520:    Notes:
3521:    If the nnz parameter is given then the nz parameter is ignored

3523:    You can call MatGetInfo() to get information on how effective the preallocation was;
3524:    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3525:    You can also run with the option -info and look for messages with the string 
3526:    malloc in them to see if additional memory allocation was needed.

3528:    The block AIJ format is fully compatible with standard Fortran 77
3529:    storage.  That is, the stored row and column indices can begin at
3530:    either one (as in Fortran) or zero.  See the users' manual for details.

3532:    Specify the preallocated storage with either nz or nnz (not both).
3533:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
3534:    allocation.  For additional details, see the users manual chapter on
3535:    matrices.

3537: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3538: @*/
3539: PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3540: {
3541:   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);

3544:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);
3545:   if (f) {
3546:     (*f)(B,bs,nz,nnz);
3547:   }
3548:   return(0);
3549: }

3553: /*@C
3554:    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3555:    (the default sequential PETSc format).  

3557:    Collective on MPI_Comm

3559:    Input Parameters:
3560: +  A - the matrix 
3561: .  i - the indices into j for the start of each local row (starts with zero)
3562: .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3563: -  v - optional values in the matrix

3565:    Level: developer

3567: .keywords: matrix, aij, compressed row, sparse

3569: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3570: @*/
3571: PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3572: {
3573:   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);

3576:   PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);
3577:   if (f) {
3578:     (*f)(B,bs,i,j,v);
3579:   }
3580:   return(0);
3581: }


3586: /*@
3587:      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 
3588:               (upper triangular entries in CSR format) provided by the user.

3590:      Collective on MPI_Comm

3592:    Input Parameters:
3593: +  comm - must be an MPI communicator of size 1
3594: .  bs - size of block
3595: .  m - number of rows
3596: .  n - number of columns
3597: .  i - row indices
3598: .  j - column indices
3599: -  a - matrix values

3601:    Output Parameter:
3602: .  mat - the matrix

3604:    Level: intermediate

3606:    Notes:
3607:        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3608:     once the matrix is destroyed

3610:        You cannot set new nonzero locations into this matrix, that will generate an error.

3612:        The i and j indices are 0 based

3614: .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()

3616: @*/
3617: PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3618: {
3620:   PetscInt       ii;
3621:   Mat_SeqBAIJ    *baij;

3624:   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3625:   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3626: 
3627:   MatCreate(comm,mat);
3628:   MatSetSizes(*mat,m,n,m,n);
3629:   MatSetType(*mat,MATSEQBAIJ);
3630:   MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);
3631:   baij = (Mat_SeqBAIJ*)(*mat)->data;
3632:   PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);
3633:   PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));

3635:   baij->i = i;
3636:   baij->j = j;
3637:   baij->a = a;
3638:   baij->singlemalloc = PETSC_FALSE;
3639:   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3640:   baij->free_a       = PETSC_FALSE;
3641:   baij->free_ij       = PETSC_FALSE;

3643:   for (ii=0; ii<m; ii++) {
3644:     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3645: #if defined(PETSC_USE_DEBUG)
3646:     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3647: #endif    
3648:   }
3649: #if defined(PETSC_USE_DEBUG)
3650:   for (ii=0; ii<baij->i[m]; ii++) {
3651:     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3652:     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3653:   }
3654: #endif    

3656:   MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
3657:   MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
3658:   return(0);
3659: }