Actual source code: sbaij2.c

  1: #define PETSCMAT_DLL

 3:  #include ../src/mat/impls/baij/seq/baij.h
 4:  #include ../src/mat/blockinvert.h
 5:  #include petscbt.h
 6:  #include ../src/mat/impls/sbaij/seq/sbaij.h
 7:  #include petscblaslapack.h

 11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 12: {
 13:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 15:   PetscInt       brow,i,j,k,l,mbs,n,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 16:   const PetscInt *idx;
 17:   PetscBT        table,table0;

 20:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 21:   mbs = a->mbs;
 22:   ai  = a->i;
 23:   aj  = a->j;
 24:   bs  = A->rmap->bs;
 25:   PetscBTCreate(mbs,table);
 26:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 27:   PetscMalloc((A->rmap->N+1)*sizeof(PetscInt),&nidx2);
 28:   PetscBTCreate(mbs,table0);

 30:   for (i=0; i<is_max; i++) { /* for each is */
 31:     isz  = 0;
 32:     PetscBTMemzero(mbs,table);
 33: 
 34:     /* Extract the indices, assume there can be duplicate entries */
 35:     ISGetIndices(is[i],&idx);
 36:     ISGetLocalSize(is[i],&n);

 38:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 39:     bcol_max = 0;
 40:     for (j=0; j<n ; ++j){
 41:       brow = idx[j]/bs; /* convert the indices into block indices */
 42:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 43:       if(!PetscBTLookupSet(table,brow)) {
 44:         nidx[isz++] = brow;
 45:         if (bcol_max < brow) bcol_max = brow;
 46:       }
 47:     }
 48:     ISRestoreIndices(is[i],&idx);
 49:     ISDestroy(is[i]);
 50: 
 51:     k = 0;
 52:     for (j=0; j<ov; j++){ /* for each overlap */
 53:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 54:       PetscBTMemzero(mbs,table0);
 55:       for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }

 57:       n = isz;  /* length of the updated is[i] */
 58:       for (brow=0; brow<mbs; brow++){
 59:         start = ai[brow]; end   = ai[brow+1];
 60:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 61:           for (l = start; l<end ; l++){
 62:             bcol = aj[l];
 63:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 64:           }
 65:           k++;
 66:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 67:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 68:           for (l = start; l<end ; l++){
 69:             bcol = aj[l];
 70:             if (bcol > bcol_max) break;
 71:             if (PetscBTLookup(table0,bcol)){
 72:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 73:               break; /* for l = start; l<end ; l++) */
 74:             }
 75:           }
 76:         }
 77:       }
 78:     } /* for each overlap */

 80:     /* expand the Index Set */
 81:     for (j=0; j<isz; j++) {
 82:       for (k=0; k<bs; k++)
 83:         nidx2[j*bs+k] = nidx[j]*bs+k;
 84:     }
 85:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 86:   }
 87:   PetscBTDestroy(table);
 88:   PetscFree(nidx);
 89:   PetscFree(nidx2);
 90:   PetscBTDestroy(table0);
 91:   return(0);
 92: }

 96: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
 97: {
 98:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
100:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
101:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
102:   PetscInt       nrows,*ssmap,bs=A->rmap->bs,bs2=a->bs2;
103:   const PetscInt *irow,*aj = a->j,*ai = a->i;
104:   MatScalar      *mat_a;
105:   Mat            C;
106:   PetscTruth     flag,sorted;

109:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
110:   ISSorted(iscol,&sorted);
111:   if (!sorted) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

113:   ISGetIndices(isrow,&irow);
114:   ISGetSize(isrow,&nrows);
115: 
116:   PetscMalloc(oldcols*sizeof(PetscInt),&smap);
117:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
118:   ssmap = smap;
119:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
120:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
121:   /* determine lens of each row */
122:   for (i=0; i<nrows; i++) {
123:     kstart  = ai[irow[i]];
124:     kend    = kstart + a->ilen[irow[i]];
125:     lens[i] = 0;
126:       for (k=kstart; k<kend; k++) {
127:         if (ssmap[aj[k]]) {
128:           lens[i]++;
129:         }
130:       }
131:     }
132:   /* Create and fill new matrix */
133:   if (scall == MAT_REUSE_MATRIX) {
134:     c = (Mat_SeqSBAIJ *)((*B)->data);

136:     if (c->mbs!=nrows || (*B)->rmap->bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
137:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
138:     if (!flag) {
139:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
140:     }
141:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
142:     C = *B;
143:   } else {
144:     MatCreate(((PetscObject)A)->comm,&C);
145:     MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
146:     MatSetType(C,((PetscObject)A)->type_name);
147:     MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
148:   }
149:   c = (Mat_SeqSBAIJ *)(C->data);
150:   for (i=0; i<nrows; i++) {
151:     row    = irow[i];
152:     kstart = ai[row];
153:     kend   = kstart + a->ilen[row];
154:     mat_i  = c->i[i];
155:     mat_j  = c->j + mat_i;
156:     mat_a  = c->a + mat_i*bs2;
157:     mat_ilen = c->ilen + i;
158:     for (k=kstart; k<kend; k++) {
159:       if ((tcol=ssmap[a->j[k]])) {
160:         *mat_j++ = tcol - 1;
161:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
162:         mat_a   += bs2;
163:         (*mat_ilen)++;
164:       }
165:     }
166:   }
167: 
168:   /* Free work space */
169:   PetscFree(smap);
170:   PetscFree(lens);
171:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
172:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
173: 
174:   ISRestoreIndices(isrow,&irow);
175:   *B = C;
176:   return(0);
177: }

181: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
182: {
183:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
184:   IS             is1;
186:   PetscInt       *vary,*iary,nrows,i,bs=A->rmap->bs,count;
187:   const PetscInt *irow;

190:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
191: 
192:   ISGetIndices(isrow,&irow);
193:   ISGetSize(isrow,&nrows);
194: 
195:   /* Verify if the indices corespond to each element in a block 
196:    and form the IS with compressed IS */
197:   PetscMalloc2(a->mbs,PetscInt,&vary,a->mbs,PetscInt,&iary);
198:   PetscMemzero(vary,a->mbs*sizeof(PetscInt));
199:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
200: 
201:   count = 0;
202:   for (i=0; i<a->mbs; i++) {
203:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
204:     if (vary[i]==bs) iary[count++] = i;
205:   }
206:   ISRestoreIndices(isrow,&irow);
207:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
208:   PetscFree2(vary,iary);

210:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,scall,B);
211:   ISDestroy(is1);
212:   return(0);
213: }

217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220:   PetscInt       i;

223:   if (scall == MAT_INITIAL_MATRIX) {
224:     PetscMalloc((n+1)*sizeof(Mat),B);
225:   }

227:   for (i=0; i<n; i++) {
228:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],scall,&(*B)[i]);
229:   }
230:   return(0);
231: }

233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/

239: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
240: {
241:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
242:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
243:   MatScalar      *v;
245:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
246:   PetscInt       nonzerorow=0;

249:   VecSet(zz,zero);
250:   VecGetArray(xx,&x);
251:   VecGetArray(zz,&z);
252: 
253:   v     = a->a;
254:   xb = x;

256:   for (i=0; i<mbs; i++) {
257:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
258:     x1 = xb[0]; x2 = xb[1];
259:     ib = aj + *ai;
260:     jmin = 0;
261:     nonzerorow += (n>0);
262:     if (*ib == i){     /* (diag of A)*x */
263:       z[2*i]   += v[0]*x1 + v[2]*x2;
264:       z[2*i+1] += v[2]*x1 + v[3]*x2;
265:       v += 4; jmin++;
266:     }
267:     for (j=jmin; j<n; j++) {
268:       /* (strict lower triangular part of A)*x  */
269:       cval       = ib[j]*2;
270:       z[cval]     += v[0]*x1 + v[1]*x2;
271:       z[cval+1]   += v[2]*x1 + v[3]*x2;
272:       /* (strict upper triangular part of A)*x  */
273:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
274:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
275:       v  += 4;
276:     }
277:     xb +=2; ai++;
278:   }

280:   VecRestoreArray(xx,&x);
281:   VecRestoreArray(zz,&z);
282:   PetscLogFlops(8.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
283:   return(0);
284: }

288: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
289: {
290:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
291:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
292:   MatScalar      *v;
294:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
295:   PetscInt       nonzerorow=0;

298:   VecSet(zz,zero);
299:   VecGetArray(xx,&x);
300:   VecGetArray(zz,&z);
301: 
302:   v    = a->a;
303:   xb   = x;

305:   for (i=0; i<mbs; i++) {
306:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
307:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
308:     ib = aj + *ai;
309:     jmin = 0;
310:     nonzerorow += (n>0);
311:     if (*ib == i){     /* (diag of A)*x */
312:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
313:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
314:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
315:       v += 9; jmin++;
316:     }
317:     for (j=jmin; j<n; j++) {
318:       /* (strict lower triangular part of A)*x  */
319:       cval       = ib[j]*3;
320:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
321:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
322:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
323:       /* (strict upper triangular part of A)*x  */
324:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
325:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
326:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
327:       v  += 9;
328:     }
329:     xb +=3; ai++;
330:   }

332:   VecRestoreArray(xx,&x);
333:   VecRestoreArray(zz,&z);
334:   PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
335:   return(0);
336: }

340: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
341: {
342:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
343:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
344:   MatScalar      *v;
346:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
347:   PetscInt       nonzerorow=0;

350:   VecSet(zz,zero);
351:   VecGetArray(xx,&x);
352:   VecGetArray(zz,&z);
353: 
354:   v     = a->a;
355:   xb = x;

357:   for (i=0; i<mbs; i++) {
358:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
359:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
360:     ib = aj + *ai;
361:     jmin = 0;
362:     nonzerorow += (n>0);
363:     if (*ib == i){     /* (diag of A)*x */
364:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
365:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
366:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
367:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
368:       v += 16; jmin++;
369:     }
370:     for (j=jmin; j<n; j++) {
371:       /* (strict lower triangular part of A)*x  */
372:       cval       = ib[j]*4;
373:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
374:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
375:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
376:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
377:       /* (strict upper triangular part of A)*x  */
378:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
379:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
380:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
381:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
382:       v  += 16;
383:     }
384:     xb +=4; ai++;
385:   }

387:   VecRestoreArray(xx,&x);
388:   VecRestoreArray(zz,&z);
389:   PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
390:   return(0);
391: }

395: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
396: {
397:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
398:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
399:   MatScalar      *v;
401:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
402:   PetscInt       nonzerorow=0;

405:   VecSet(zz,zero);
406:   VecGetArray(xx,&x);
407:   VecGetArray(zz,&z);
408: 
409:   v     = a->a;
410:   xb = x;

412:   for (i=0; i<mbs; i++) {
413:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
414:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
415:     ib = aj + *ai;
416:     jmin = 0;
417:     nonzerorow += (n>0);
418:     if (*ib == i){      /* (diag of A)*x */
419:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
420:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
421:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
422:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
423:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
424:       v += 25; jmin++;
425:     }
426:     for (j=jmin; j<n; j++) {
427:       /* (strict lower triangular part of A)*x  */
428:       cval       = ib[j]*5;
429:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
430:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
431:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
432:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
433:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
434:       /* (strict upper triangular part of A)*x  */
435:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
436:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
437:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
438:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
439:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
440:       v  += 25;
441:     }
442:     xb +=5; ai++;
443:   }

445:   VecRestoreArray(xx,&x);
446:   VecRestoreArray(zz,&z);
447:   PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
448:   return(0);
449: }


454: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
455: {
456:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
457:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
458:   MatScalar      *v;
460:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
461:   PetscInt       nonzerorow=0;

464:   VecSet(zz,zero);
465:   VecGetArray(xx,&x);
466:   VecGetArray(zz,&z);
467: 
468:   v     = a->a;
469:   xb = x;

471:   for (i=0; i<mbs; i++) {
472:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
473:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
474:     ib = aj + *ai;
475:     jmin = 0;
476:     nonzerorow += (n>0);
477:     if (*ib == i){      /* (diag of A)*x */
478:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
479:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
480:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
481:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
482:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
483:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
484:       v += 36; jmin++;
485:     }
486:     for (j=jmin; j<n; j++) {
487:       /* (strict lower triangular part of A)*x  */
488:       cval       = ib[j]*6;
489:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
490:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
491:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
492:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
493:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
494:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
495:       /* (strict upper triangular part of A)*x  */
496:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
497:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
498:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
499:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
500:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
501:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
502:       v  += 36;
503:     }
504:     xb +=6; ai++;
505:   }

507:   VecRestoreArray(xx,&x);
508:   VecRestoreArray(zz,&z);
509:   PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
510:   return(0);
511: }
514: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
515: {
516:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
517:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
518:   MatScalar      *v;
520:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
521:   PetscInt       nonzerorow=0;

524:   VecSet(zz,zero);
525:   VecGetArray(xx,&x);
526:   VecGetArray(zz,&z);
527: 
528:   v     = a->a;
529:   xb = x;

531:   for (i=0; i<mbs; i++) {
532:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
533:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
534:     ib = aj + *ai;
535:     jmin = 0;
536:     nonzerorow += (n>0);
537:     if (*ib == i){      /* (diag of A)*x */
538:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
539:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
540:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
541:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
542:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
543:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
544:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
545:       v += 49; jmin++;
546:     }
547:     for (j=jmin; j<n; j++) {
548:       /* (strict lower triangular part of A)*x  */
549:       cval       = ib[j]*7;
550:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
551:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
552:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
553:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
554:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
555:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
556:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
557:       /* (strict upper triangular part of A)*x  */
558:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
559:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
560:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
561:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
562:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
563:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
564:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
565:       v  += 49;
566:     }
567:     xb +=7; ai++;
568:   }
569:   VecRestoreArray(xx,&x);
570:   VecRestoreArray(zz,&z);
571:   PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow) - nonzerorow);
572:   return(0);
573: }

575: /*
576:     This will not work with MatScalar == float because it calls the BLAS
577: */
580: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
581: {
582:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
583:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
584:   MatScalar      *v;
586:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
587:   PetscInt       nonzerorow=0;

590:   VecSet(zz,zero);
591:   VecGetArray(xx,&x); x_ptr=x;
592:   VecGetArray(zz,&z); z_ptr=z;

594:   aj   = a->j;
595:   v    = a->a;
596:   ii   = a->i;

598:   if (!a->mult_work) {
599:     PetscMalloc((A->rmap->N+1)*sizeof(PetscScalar),&a->mult_work);
600:   }
601:   work = a->mult_work;
602: 
603:   for (i=0; i<mbs; i++) {
604:     n     = ii[1] - ii[0]; ncols = n*bs;
605:     workt = work; idx=aj+ii[0];
606:     nonzerorow += (n>0);

608:     /* upper triangular part */
609:     for (j=0; j<n; j++) {
610:       xb = x_ptr + bs*(*idx++);
611:       for (k=0; k<bs; k++) workt[k] = xb[k];
612:       workt += bs;
613:     }
614:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
615:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
616: 
617:     /* strict lower triangular part */
618:     idx = aj+ii[0];
619:     if (*idx == i){
620:       ncols -= bs; v += bs2; idx++; n--;
621:     }
622: 
623:     if (ncols > 0){
624:       workt = work;
625:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
626:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
627:       for (j=0; j<n; j++) {
628:         zb = z_ptr + bs*(*idx++);
629:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
630:         workt += bs;
631:       }
632:     }
633:     x += bs; v += n*bs2; z += bs; ii++;
634:   }
635: 
636:   VecRestoreArray(xx,&x);
637:   VecRestoreArray(zz,&z);
638:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow)*bs2 - nonzerorow);
639:   return(0);
640: }

645: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
646: {
647:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
648:   PetscScalar    *x,*z,*xb,x1;
649:   MatScalar      *v;
651:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
652:   PetscInt       nonzerorow=0;

655:   VecCopy_Seq(yy,zz);
656:   VecGetArray(xx,&x);
657:   VecGetArray(zz,&z);
658:   v  = a->a;
659:   xb = x;

661:   for (i=0; i<mbs; i++) {
662:     n  = ai[1] - ai[0];  /* length of i_th row of A */
663:     x1 = xb[0];
664:     ib = aj + *ai;
665:     jmin = 0;
666:     nonzerorow += (n>0);
667:     if (*ib == i) {            /* (diag of A)*x */
668:       z[i] += *v++ * x[*ib++]; jmin++;
669:     }
670:     for (j=jmin; j<n; j++) {
671:       cval    = *ib;
672:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
673:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
674:     }
675:     xb++; ai++;
676:   }

678:   VecRestoreArray(xx,&x);
679:   VecRestoreArray(zz,&z);
680: 
681:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
682:   return(0);
683: }

687: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
688: {
689:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
690:   PetscScalar    *x,*z,*xb,x1,x2;
691:   MatScalar      *v;
693:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
694:   PetscInt       nonzerorow=0;

697:   VecCopy_Seq(yy,zz);
698:   VecGetArray(xx,&x);
699:   VecGetArray(zz,&z);

701:   v  = a->a;
702:   xb = x;

704:   for (i=0; i<mbs; i++) {
705:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
706:     x1 = xb[0]; x2 = xb[1];
707:     ib = aj + *ai;
708:     jmin = 0;
709:     nonzerorow += (n>0);
710:     if (*ib == i){      /* (diag of A)*x */
711:       z[2*i]   += v[0]*x1 + v[2]*x2;
712:       z[2*i+1] += v[2]*x1 + v[3]*x2;
713:       v += 4; jmin++;
714:     }
715:     for (j=jmin; j<n; j++) {
716:       /* (strict lower triangular part of A)*x  */
717:       cval       = ib[j]*2;
718:       z[cval]     += v[0]*x1 + v[1]*x2;
719:       z[cval+1]   += v[2]*x1 + v[3]*x2;
720:       /* (strict upper triangular part of A)*x  */
721:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
722:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
723:       v  += 4;
724:     }
725:     xb +=2; ai++;
726:   }
727:   VecRestoreArray(xx,&x);
728:   VecRestoreArray(zz,&z);

730:   PetscLogFlops(4.0*(a->nz*2.0 - nonzerorow));
731:   return(0);
732: }

736: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
737: {
738:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
739:   PetscScalar    *x,*z,*xb,x1,x2,x3;
740:   MatScalar      *v;
742:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
743:   PetscInt       nonzerorow=0;

746:   VecCopy_Seq(yy,zz);
747:   VecGetArray(xx,&x);
748:   VecGetArray(zz,&z);

750:   v     = a->a;
751:   xb = x;

753:   for (i=0; i<mbs; i++) {
754:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
755:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
756:     ib = aj + *ai;
757:     jmin = 0;
758:     nonzerorow += (n>0);
759:     if (*ib == i){     /* (diag of A)*x */
760:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
761:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
762:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
763:      v += 9; jmin++;
764:     }
765:     for (j=jmin; j<n; j++) {
766:       /* (strict lower triangular part of A)*x  */
767:       cval       = ib[j]*3;
768:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
769:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
770:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
771:       /* (strict upper triangular part of A)*x  */
772:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
773:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
774:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
775:       v  += 9;
776:     }
777:     xb +=3; ai++;
778:   }

780:   VecRestoreArray(xx,&x);
781:   VecRestoreArray(zz,&z);

783:   PetscLogFlops(18.0*(a->nz*2.0 - nonzerorow));
784:   return(0);
785: }

789: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
790: {
791:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
792:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
793:   MatScalar      *v;
795:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
796:   PetscInt       nonzerorow=0;

799:   VecCopy_Seq(yy,zz);
800:   VecGetArray(xx,&x);
801:   VecGetArray(zz,&z);

803:   v     = a->a;
804:   xb = x;

806:   for (i=0; i<mbs; i++) {
807:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
808:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
809:     ib = aj + *ai;
810:     jmin = 0;
811:     nonzerorow += (n>0);
812:     if (*ib == i){      /* (diag of A)*x */
813:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
814:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
815:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
816:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
817:       v += 16; jmin++;
818:     }
819:     for (j=jmin; j<n; j++) {
820:       /* (strict lower triangular part of A)*x  */
821:       cval       = ib[j]*4;
822:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
823:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
824:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
825:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
826:       /* (strict upper triangular part of A)*x  */
827:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
828:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
829:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
830:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
831:       v  += 16;
832:     }
833:     xb +=4; ai++;
834:   }

836:   VecRestoreArray(xx,&x);
837:   VecRestoreArray(zz,&z);

839:   PetscLogFlops(32.0*(a->nz*2.0 - nonzerorow));
840:   return(0);
841: }

845: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
846: {
847:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
848:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
849:   MatScalar      *v;
851:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
852:   PetscInt       nonzerorow=0;

855:   VecCopy_Seq(yy,zz);
856:   VecGetArray(xx,&x);
857:   VecGetArray(zz,&z);

859:   v     = a->a;
860:   xb = x;

862:   for (i=0; i<mbs; i++) {
863:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
864:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
865:     ib = aj + *ai;
866:     jmin = 0;
867:     nonzerorow += (n>0);
868:     if (*ib == i){      /* (diag of A)*x */
869:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
870:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
871:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
872:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
873:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
874:       v += 25; jmin++;
875:     }
876:     for (j=jmin; j<n; j++) {
877:       /* (strict lower triangular part of A)*x  */
878:       cval       = ib[j]*5;
879:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
880:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
881:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
882:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
883:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
884:       /* (strict upper triangular part of A)*x  */
885:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
886:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
887:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
888:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
889:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
890:       v  += 25;
891:     }
892:     xb +=5; ai++;
893:   }

895:   VecRestoreArray(xx,&x);
896:   VecRestoreArray(zz,&z);

898:   PetscLogFlops(50.0*(a->nz*2.0 - nonzerorow));
899:   return(0);
900: }
903: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
904: {
905:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
906:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
907:   MatScalar      *v;
909:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
910:   PetscInt       nonzerorow=0;

913:   VecCopy_Seq(yy,zz);
914:   VecGetArray(xx,&x);
915:   VecGetArray(zz,&z);

917:   v     = a->a;
918:   xb = x;

920:   for (i=0; i<mbs; i++) {
921:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
922:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
923:     ib = aj + *ai;
924:     jmin = 0;
925:     nonzerorow += (n>0);
926:     if (*ib == i){     /* (diag of A)*x */
927:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
928:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
929:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
930:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
931:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
932:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
933:       v += 36; jmin++;
934:     }
935:     for (j=jmin; j<n; j++) {
936:       /* (strict lower triangular part of A)*x  */
937:       cval       = ib[j]*6;
938:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
939:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
940:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
941:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
942:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
943:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
944:       /* (strict upper triangular part of A)*x  */
945:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
946:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
947:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
948:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
949:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
950:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
951:       v  += 36;
952:     }
953:     xb +=6; ai++;
954:   }

956:   VecRestoreArray(xx,&x);
957:   VecRestoreArray(zz,&z);

959:   PetscLogFlops(72.0*(a->nz*2.0 - nonzerorow));
960:   return(0);
961: }

965: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
966: {
967:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
968:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
969:   MatScalar      *v;
971:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;
972:   PetscInt       nonzerorow=0;

975:   VecCopy_Seq(yy,zz);
976:   VecGetArray(xx,&x);
977:   VecGetArray(zz,&z);

979:   v     = a->a;
980:   xb = x;

982:   for (i=0; i<mbs; i++) {
983:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
984:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
985:     ib = aj + *ai;
986:     jmin = 0;
987:     nonzerorow += (n>0);
988:     if (*ib == i){     /* (diag of A)*x */
989:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
990:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
991:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
992:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
993:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
994:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
995:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
996:       v += 49; jmin++;
997:     }
998:     for (j=jmin; j<n; j++) {
999:       /* (strict lower triangular part of A)*x  */
1000:       cval       = ib[j]*7;
1001:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1002:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1003:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1004:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1005:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1006:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1007:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1008:       /* (strict upper triangular part of A)*x  */
1009:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1010:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1011:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1012:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1013:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1014:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1015:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1016:       v  += 49;
1017:     }
1018:     xb +=7; ai++;
1019:   }

1021:   VecRestoreArray(xx,&x);
1022:   VecRestoreArray(zz,&z);

1024:   PetscLogFlops(98.0*(a->nz*2.0 - nonzerorow));
1025:   return(0);
1026: }

1030: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1031: {
1032:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1033:   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1034:   MatScalar      *v;
1036:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap->bs,j,n,bs2=a->bs2,ncols,k;
1037:   PetscInt       nonzerorow=0;

1040:   VecCopy_Seq(yy,zz);
1041:   VecGetArray(xx,&x); x_ptr=x;
1042:   VecGetArray(zz,&z); z_ptr=z;

1044:   aj   = a->j;
1045:   v    = a->a;
1046:   ii   = a->i;

1048:   if (!a->mult_work) {
1049:     PetscMalloc((A->rmap->n+1)*sizeof(PetscScalar),&a->mult_work);
1050:   }
1051:   work = a->mult_work;
1052: 
1053: 
1054:   for (i=0; i<mbs; i++) {
1055:     n     = ii[1] - ii[0]; ncols = n*bs;
1056:     workt = work; idx=aj+ii[0];
1057:     nonzerorow += (n>0);

1059:     /* upper triangular part */
1060:     for (j=0; j<n; j++) {
1061:       xb = x_ptr + bs*(*idx++);
1062:       for (k=0; k<bs; k++) workt[k] = xb[k];
1063:       workt += bs;
1064:     }
1065:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1066:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1068:     /* strict lower triangular part */
1069:     idx = aj+ii[0];
1070:     if (*idx == i){
1071:       ncols -= bs; v += bs2; idx++; n--;
1072:     }
1073:     if (ncols > 0){
1074:       workt = work;
1075:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1076:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1077:       for (j=0; j<n; j++) {
1078:         zb = z_ptr + bs*(*idx++);
1079:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1080:         workt += bs;
1081:       }
1082:     }

1084:     x += bs; v += n*bs2; z += bs; ii++;
1085:   }

1087:   VecRestoreArray(xx,&x);
1088:   VecRestoreArray(zz,&z);

1090:   PetscLogFlops(2.0*(a->nz*2.0 - nonzerorow));
1091:   return(0);
1092: }

1096: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1097: {
1098:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inA->data;
1099:   PetscScalar    oalpha = alpha;
1101:   PetscBLASInt   one = 1,totalnz = PetscBLASIntCast(a->bs2*a->nz);

1104:   BLASscal_(&totalnz,&oalpha,a->a,&one);
1105:   PetscLogFlops(totalnz);
1106:   return(0);
1107: }

1111: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1112: {
1113:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1114:   MatScalar      *v = a->a;
1115:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1116:   PetscInt       i,j,k,bs = A->rmap->bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1118:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1119: 
1121:   if (type == NORM_FROBENIUS) {
1122:     for (k=0; k<mbs; k++){
1123:       jmin = a->i[k]; jmax = a->i[k+1];
1124:       col  = aj + jmin;
1125:       if (*col == k){         /* diagonal block */
1126:         for (i=0; i<bs2; i++){
1127: #if defined(PETSC_USE_COMPLEX)
1128:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1129: #else
1130:           sum_diag += (*v)*(*v); v++;
1131: #endif
1132:         }
1133:         jmin++;
1134:       }
1135:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1136:         for (i=0; i<bs2; i++){
1137: #if defined(PETSC_USE_COMPLEX)
1138:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1139: #else
1140:           sum_off += (*v)*(*v); v++;
1141: #endif  
1142:         }
1143:       }
1144:     }
1145:     *norm = sqrt(sum_diag + 2*sum_off);
1146:   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1147:     PetscMalloc3(bs,PetscReal,&sum,mbs,PetscInt,&il,mbs,PetscInt,&jl);
1148:     for (i=0; i<mbs; i++) jl[i] = mbs;
1149:     il[0] = 0;

1151:     *norm = 0.0;
1152:     for (k=0; k<mbs; k++) { /* k_th block row */
1153:       for (j=0; j<bs; j++) sum[j]=0.0;
1154:       /*-- col sum --*/
1155:       i = jl[k]; /* first |A(i,k)| to be added */
1156:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1157:                   at step k */
1158:       while (i<mbs){
1159:         nexti = jl[i];  /* next block row to be added */
1160:         ik    = il[i];  /* block index of A(i,k) in the array a */
1161:         for (j=0; j<bs; j++){
1162:           v = a->a + ik*bs2 + j*bs;
1163:           for (k1=0; k1<bs; k1++) {
1164:             sum[j] += PetscAbsScalar(*v); v++;
1165:           }
1166:         }
1167:         /* update il, jl */
1168:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1169:         jmax = a->i[i+1];
1170:         if (jmin < jmax){
1171:           il[i] = jmin;
1172:           j   = a->j[jmin];
1173:           jl[i] = jl[j]; jl[j]=i;
1174:         }
1175:         i = nexti;
1176:       }
1177:       /*-- row sum --*/
1178:       jmin = a->i[k]; jmax = a->i[k+1];
1179:       for (i=jmin; i<jmax; i++) {
1180:         for (j=0; j<bs; j++){
1181:           v = a->a + i*bs2 + j;
1182:           for (k1=0; k1<bs; k1++){
1183:             sum[j] += PetscAbsScalar(*v); v += bs;
1184:           }
1185:         }
1186:       }
1187:       /* add k_th block row to il, jl */
1188:       col = aj+jmin;
1189:       if (*col == k) jmin++;
1190:       if (jmin < jmax){
1191:         il[k] = jmin;
1192:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1193:       }
1194:       for (j=0; j<bs; j++){
1195:         if (sum[j] > *norm) *norm = sum[j];
1196:       }
1197:     }
1198:     PetscFree3(sum,il,jl);
1199:   } else {
1200:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1201:   }
1202:   return(0);
1203: }

1207: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1208: {
1209:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1214:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1215:   if ((A->rmap->N != B->rmap->N) || (A->cmap->n != B->cmap->n) || (A->rmap->bs != B->rmap->bs)|| (a->nz != b->nz)) {
1216:     *flg = PETSC_FALSE;
1217:     return(0);
1218:   }
1219: 
1220:   /* if the a->i are the same */
1221:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1222:   if (!*flg) {
1223:     return(0);
1224:   }
1225: 
1226:   /* if a->j are the same */
1227:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1228:   if (!*flg) {
1229:     return(0);
1230:   }
1231:   /* if a->a are the same */
1232:   PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap->bs)*(A->rmap->bs)*sizeof(PetscScalar),flg);
1233:   return(0);
1234: }

1238: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1239: {
1240:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1242:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1243:   PetscScalar    *x,zero = 0.0;
1244:   MatScalar      *aa,*aa_j;

1247:   bs   = A->rmap->bs;
1248:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1249: 
1250:   aa   = a->a;
1251:   ai   = a->i;
1252:   aj   = a->j;
1253:   ambs = a->mbs;
1254:   bs2  = a->bs2;

1256:   VecSet(v,zero);
1257:   VecGetArray(v,&x);
1258:   VecGetLocalSize(v,&n);
1259:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1260:   for (i=0; i<ambs; i++) {
1261:     j=ai[i];
1262:     if (aj[j] == i) {             /* if this is a diagonal element */
1263:       row  = i*bs;
1264:       aa_j = aa + j*bs2;
1265:       if (A->factor && bs==1){
1266:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1267:       } else {
1268:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1269:       }
1270:     }
1271:   }
1272: 
1273:   VecRestoreArray(v,&x);
1274:   return(0);
1275: }

1279: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1280: {
1281:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1282:   PetscScalar    *l,x,*li,*ri;
1283:   MatScalar      *aa,*v;
1285:   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1286:   PetscTruth     flg;

1289:   if (ll != rr){
1290:     VecEqual(ll,rr,&flg);
1291:     if (!flg)
1292:       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1293:   }
1294:   if (!ll) return(0);
1295:   ai  = a->i;
1296:   aj  = a->j;
1297:   aa  = a->a;
1298:   m   = A->rmap->N;
1299:   bs  = A->rmap->bs;
1300:   mbs = a->mbs;
1301:   bs2 = a->bs2;

1303:   VecGetArray(ll,&l);
1304:   VecGetLocalSize(ll,&lm);
1305:   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1306:   for (i=0; i<mbs; i++) { /* for each block row */
1307:     M  = ai[i+1] - ai[i];
1308:     li = l + i*bs;
1309:     v  = aa + bs2*ai[i];
1310:     for (j=0; j<M; j++) { /* for each block */
1311:       ri = l + bs*aj[ai[i]+j];
1312:       for (k=0; k<bs; k++) {
1313:         x = ri[k];
1314:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1315:       }
1316:     }
1317:   }
1318:   VecRestoreArray(ll,&l);
1319:   PetscLogFlops(2.0*a->nz);
1320:   return(0);
1321: }

1325: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1326: {
1327:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1330:   info->block_size     = a->bs2;
1331:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1332:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1333:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1334:   info->assemblies   = A->num_ass;
1335:   info->mallocs      = a->reallocs;
1336:   info->memory       = ((PetscObject)A)->mem;
1337:   if (A->factor) {
1338:     info->fill_ratio_given  = A->info.fill_ratio_given;
1339:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1340:     info->factor_mallocs    = A->info.factor_mallocs;
1341:   } else {
1342:     info->fill_ratio_given  = 0;
1343:     info->fill_ratio_needed = 0;
1344:     info->factor_mallocs    = 0;
1345:   }
1346:   return(0);
1347: }


1352: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1353: {
1354:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1358:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1359:   return(0);
1360: }

1364: /* 
1365:    This code does not work since it only checks the upper triangular part of
1366:   the matrix. Hence it is not listed in the function table.
1367: */
1368: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1369: {
1370:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1372:   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1373:   PetscReal      atmp;
1374:   MatScalar      *aa;
1375:   PetscScalar    *x;
1376:   PetscInt       ncols,brow,bcol,krow,kcol;

1379:   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1380:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1381:   bs   = A->rmap->bs;
1382:   aa   = a->a;
1383:   ai   = a->i;
1384:   aj   = a->j;
1385:   mbs = a->mbs;

1387:   VecSet(v,0.0);
1388:   VecGetArray(v,&x);
1389:   VecGetLocalSize(v,&n);
1390:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1391:   for (i=0; i<mbs; i++) {
1392:     ncols = ai[1] - ai[0]; ai++;
1393:     brow  = bs*i;
1394:     for (j=0; j<ncols; j++){
1395:       bcol = bs*(*aj);
1396:       for (kcol=0; kcol<bs; kcol++){
1397:         col = bcol + kcol;      /* col index */
1398:         for (krow=0; krow<bs; krow++){
1399:           atmp = PetscAbsScalar(*aa); aa++;
1400:           row = brow + krow;    /* row index */
1401:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1402:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1403:         }
1404:       }
1405:       aj++;
1406:     }
1407:   }
1408:   VecRestoreArray(v,&x);
1409:   return(0);
1410: }