Actual source code: maij.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Defines the basic matrix operations for the MAIJ  matrix storage format.
  5:   This format is used for restriction and interpolation operations for 
  6:   multicomponent problems. It interpolates each component the same way
  7:   independently.

  9:      We provide:
 10:          MatMult()
 11:          MatMultTranspose()
 12:          MatMultTransposeAdd()
 13:          MatMultAdd()
 14:           and
 15:          MatCreateMAIJ(Mat,dof,Mat*)

 17:      This single directory handles both the sequential and parallel codes
 18: */

 20:  #include ../src/mat/impls/maij/maij.h
 21:  #include ../src/mat/utils/freespace.h
 22:  #include private/vecimpl.h

 26: /*@C
 27:    MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix

 29:    Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel

 31:    Input Parameter:
 32: .  A - the MAIJ matrix

 34:    Output Parameter:
 35: .  B - the AIJ matrix

 37:    Level: advanced

 39:    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.

 41: .seealso: MatCreateMAIJ()
 42: @*/
 43: PetscErrorCode  MatMAIJGetAIJ(Mat A,Mat *B)
 44: {
 46:   PetscTruth     ismpimaij,isseqmaij;

 49:   PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
 50:   PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
 51:   if (ismpimaij) {
 52:     Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;

 54:     *B = b->A;
 55:   } else if (isseqmaij) {
 56:     Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;

 58:     *B = b->AIJ;
 59:   } else {
 60:     *B = A;
 61:   }
 62:   return(0);
 63: }

 67: /*@C
 68:    MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size

 70:    Collective

 72:    Input Parameter:
 73: +  A - the MAIJ matrix
 74: -  dof - the block size for the new matrix

 76:    Output Parameter:
 77: .  B - the new MAIJ matrix

 79:    Level: advanced

 81: .seealso: MatCreateMAIJ()
 82: @*/
 83: PetscErrorCode  MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
 84: {
 86:   Mat            Aij = PETSC_NULL;

 89:   MatMAIJGetAIJ(A,&Aij);
 90:   MatCreateMAIJ(Aij,dof,B);
 91:   return(0);
 92: }

 96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
 97: {
 99:   Mat_SeqMAIJ    *b = (Mat_SeqMAIJ*)A->data;

102:   if (b->AIJ) {
103:     MatDestroy(b->AIJ);
104:   }
105:   PetscFree(b);
106:   return(0);
107: }

111: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112: {
114:   Mat            B;

117:   MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
118:   MatView(B,viewer);
119:   MatDestroy(B);
120:   return(0);
121: }

125: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
126: {
128:   Mat            B;

131:   MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
132:   MatView(B,viewer);
133:   MatDestroy(B);
134:   return(0);
135: }

139: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
140: {
142:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

145:   if (b->AIJ) {
146:     MatDestroy(b->AIJ);
147:   }
148:   if (b->OAIJ) {
149:     MatDestroy(b->OAIJ);
150:   }
151:   if (b->A) {
152:     MatDestroy(b->A);
153:   }
154:   if (b->ctx) {
155:     VecScatterDestroy(b->ctx);
156:   }
157:   if (b->w) {
158:     VecDestroy(b->w);
159:   }
160:   PetscFree(b);
161:   PetscObjectChangeTypeName((PetscObject)A,0);
162:   return(0);
163: }

165: /*MC
166:   MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 
167:   multicomponent problems, interpolating or restricting each component the same way independently.
168:   The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.

170:   Operations provided:
171: . MatMult
172: . MatMultTranspose
173: . MatMultAdd
174: . MatMultTransposeAdd

176:   Level: advanced

178: .seealso: MatCreateSeqDense
179: M*/

184: PetscErrorCode  MatCreate_MAIJ(Mat A)
185: {
187:   Mat_MPIMAIJ    *b;
188:   PetscMPIInt    size;

191:   PetscNewLog(A,Mat_MPIMAIJ,&b);
192:   A->data  = (void*)b;
193:   PetscMemzero(A->ops,sizeof(struct _MatOps));
194:   A->mapping          = 0;

196:   b->AIJ  = 0;
197:   b->dof  = 0;
198:   b->OAIJ = 0;
199:   b->ctx  = 0;
200:   b->w    = 0;
201:   MPI_Comm_size(((PetscObject)A)->comm,&size);
202:   if (size == 1){
203:     PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
204:   } else {
205:     PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
206:   }
207:   return(0);
208: }

211: /* --------------------------------------------------------------------------------------*/
214: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
215: {
216:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
217:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
218:   const PetscScalar *x,*v;
219:   PetscScalar       *y, sum1, sum2;
220:   PetscErrorCode    ierr;
221:   PetscInt          nonzerorow=0,n,i,jrow,j;
222:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

225:   VecGetArray(xx,(PetscScalar**)&x);
226:   VecGetArray(yy,&y);
227:   idx  = a->j;
228:   v    = a->a;
229:   ii   = a->i;

231:   for (i=0; i<m; i++) {
232:     jrow = ii[i];
233:     n    = ii[i+1] - jrow;
234:     sum1  = 0.0;
235:     sum2  = 0.0;
236:     nonzerorow += (n>0);
237:     for (j=0; j<n; j++) {
238:       sum1 += v[jrow]*x[2*idx[jrow]];
239:       sum2 += v[jrow]*x[2*idx[jrow]+1];
240:       jrow++;
241:      }
242:     y[2*i]   = sum1;
243:     y[2*i+1] = sum2;
244:   }

246:   PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
247:   VecRestoreArray(xx,(PetscScalar**)&x);
248:   VecRestoreArray(yy,&y);
249:   return(0);
250: }

254: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
255: {
256:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
257:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
258:   const PetscScalar *x,*v;
259:   PetscScalar       *y,alpha1,alpha2;
260:   PetscErrorCode    ierr;
261:   PetscInt          n,i;
262:   const PetscInt    m = b->AIJ->rmap->n,*idx;

265:   VecSet(yy,0.0);
266:   VecGetArray(xx,(PetscScalar**)&x);
267:   VecGetArray(yy,&y);
268: 
269:   for (i=0; i<m; i++) {
270:     idx    = a->j + a->i[i] ;
271:     v      = a->a + a->i[i] ;
272:     n      = a->i[i+1] - a->i[i];
273:     alpha1 = x[2*i];
274:     alpha2 = x[2*i+1];
275:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
276:   }
277:   PetscLogFlops(4.0*a->nz);
278:   VecRestoreArray(xx,(PetscScalar**)&x);
279:   VecRestoreArray(yy,&y);
280:   return(0);
281: }

285: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
286: {
287:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
288:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
289:   const PetscScalar *x,*v;
290:   PetscScalar       *y,sum1, sum2;
291:   PetscErrorCode    ierr;
292:   PetscInt          n,i,jrow,j;
293:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

296:   if (yy != zz) {VecCopy(yy,zz);}
297:   VecGetArray(xx,(PetscScalar**)&x);
298:   VecGetArray(zz,&y);
299:   idx  = a->j;
300:   v    = a->a;
301:   ii   = a->i;

303:   for (i=0; i<m; i++) {
304:     jrow = ii[i];
305:     n    = ii[i+1] - jrow;
306:     sum1  = 0.0;
307:     sum2  = 0.0;
308:     for (j=0; j<n; j++) {
309:       sum1 += v[jrow]*x[2*idx[jrow]];
310:       sum2 += v[jrow]*x[2*idx[jrow]+1];
311:       jrow++;
312:      }
313:     y[2*i]   += sum1;
314:     y[2*i+1] += sum2;
315:   }

317:   PetscLogFlops(4.0*a->nz);
318:   VecRestoreArray(xx,(PetscScalar**)&x);
319:   VecRestoreArray(zz,&y);
320:   return(0);
321: }
324: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
325: {
326:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
327:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
328:   const PetscScalar *x,*v;
329:   PetscScalar       *y,alpha1,alpha2;
330:   PetscErrorCode    ierr;
331:   PetscInt          n,i;
332:   const PetscInt    m = b->AIJ->rmap->n,*idx;

335:   if (yy != zz) {VecCopy(yy,zz);}
336:   VecGetArray(xx,(PetscScalar**)&x);
337:   VecGetArray(zz,&y);
338: 
339:   for (i=0; i<m; i++) {
340:     idx   = a->j + a->i[i] ;
341:     v     = a->a + a->i[i] ;
342:     n     = a->i[i+1] - a->i[i];
343:     alpha1 = x[2*i];
344:     alpha2 = x[2*i+1];
345:     while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
346:   }
347:   PetscLogFlops(4.0*a->nz);
348:   VecRestoreArray(xx,(PetscScalar**)&x);
349:   VecRestoreArray(zz,&y);
350:   return(0);
351: }
352: /* --------------------------------------------------------------------------------------*/
355: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
356: {
357:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
358:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
359:   const PetscScalar *x,*v;
360:   PetscScalar       *y,sum1, sum2, sum3;
361:   PetscErrorCode    ierr;
362:   PetscInt          nonzerorow=0,n,i,jrow,j;
363:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

366:   VecGetArray(xx,(PetscScalar**)&x);
367:   VecGetArray(yy,&y);
368:   idx  = a->j;
369:   v    = a->a;
370:   ii   = a->i;

372:   for (i=0; i<m; i++) {
373:     jrow = ii[i];
374:     n    = ii[i+1] - jrow;
375:     sum1  = 0.0;
376:     sum2  = 0.0;
377:     sum3  = 0.0;
378:     nonzerorow += (n>0);
379:     for (j=0; j<n; j++) {
380:       sum1 += v[jrow]*x[3*idx[jrow]];
381:       sum2 += v[jrow]*x[3*idx[jrow]+1];
382:       sum3 += v[jrow]*x[3*idx[jrow]+2];
383:       jrow++;
384:      }
385:     y[3*i]   = sum1;
386:     y[3*i+1] = sum2;
387:     y[3*i+2] = sum3;
388:   }

390:   PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
391:   VecRestoreArray(xx,(PetscScalar**)&x);
392:   VecRestoreArray(yy,&y);
393:   return(0);
394: }

398: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
399: {
400:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
401:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
402:   const PetscScalar *x,*v;
403:   PetscScalar       *y,alpha1,alpha2,alpha3;
404:   PetscErrorCode    ierr;
405:   PetscInt          n,i;
406:   const PetscInt    m = b->AIJ->rmap->n,*idx;

409:   VecSet(yy,0.0);
410:   VecGetArray(xx,(PetscScalar**)&x);
411:   VecGetArray(yy,&y);
412: 
413:   for (i=0; i<m; i++) {
414:     idx    = a->j + a->i[i];
415:     v      = a->a + a->i[i];
416:     n      = a->i[i+1] - a->i[i];
417:     alpha1 = x[3*i];
418:     alpha2 = x[3*i+1];
419:     alpha3 = x[3*i+2];
420:     while (n-->0) {
421:       y[3*(*idx)]   += alpha1*(*v);
422:       y[3*(*idx)+1] += alpha2*(*v);
423:       y[3*(*idx)+2] += alpha3*(*v);
424:       idx++; v++;
425:     }
426:   }
427:   PetscLogFlops(6.0*a->nz);
428:   VecRestoreArray(xx,(PetscScalar**)&x);
429:   VecRestoreArray(yy,&y);
430:   return(0);
431: }

435: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
438:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
439:   const PetscScalar *x,*v;
440:   PetscScalar       *y,sum1, sum2, sum3;
441:   PetscErrorCode    ierr;
442:   PetscInt          n,i,jrow,j;
443:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

446:   if (yy != zz) {VecCopy(yy,zz);}
447:   VecGetArray(xx,(PetscScalar**)&x);
448:   VecGetArray(zz,&y);
449:   idx  = a->j;
450:   v    = a->a;
451:   ii   = a->i;

453:   for (i=0; i<m; i++) {
454:     jrow = ii[i];
455:     n    = ii[i+1] - jrow;
456:     sum1  = 0.0;
457:     sum2  = 0.0;
458:     sum3  = 0.0;
459:     for (j=0; j<n; j++) {
460:       sum1 += v[jrow]*x[3*idx[jrow]];
461:       sum2 += v[jrow]*x[3*idx[jrow]+1];
462:       sum3 += v[jrow]*x[3*idx[jrow]+2];
463:       jrow++;
464:      }
465:     y[3*i]   += sum1;
466:     y[3*i+1] += sum2;
467:     y[3*i+2] += sum3;
468:   }

470:   PetscLogFlops(6.0*a->nz);
471:   VecRestoreArray(xx,(PetscScalar**)&x);
472:   VecRestoreArray(zz,&y);
473:   return(0);
474: }
477: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
478: {
479:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
480:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
481:   const PetscScalar *x,*v;
482:   PetscScalar       *y,alpha1,alpha2,alpha3;
483:   PetscErrorCode    ierr;
484:   PetscInt          n,i;
485:   const PetscInt    m = b->AIJ->rmap->n,*idx;

488:   if (yy != zz) {VecCopy(yy,zz);}
489:   VecGetArray(xx,(PetscScalar**)&x);
490:   VecGetArray(zz,&y);
491:   for (i=0; i<m; i++) {
492:     idx    = a->j + a->i[i] ;
493:     v      = a->a + a->i[i] ;
494:     n      = a->i[i+1] - a->i[i];
495:     alpha1 = x[3*i];
496:     alpha2 = x[3*i+1];
497:     alpha3 = x[3*i+2];
498:     while (n-->0) {
499:       y[3*(*idx)]   += alpha1*(*v);
500:       y[3*(*idx)+1] += alpha2*(*v);
501:       y[3*(*idx)+2] += alpha3*(*v);
502:       idx++; v++;
503:     }
504:   }
505:   PetscLogFlops(6.0*a->nz);
506:   VecRestoreArray(xx,(PetscScalar**)&x);
507:   VecRestoreArray(zz,&y);
508:   return(0);
509: }

511: /* ------------------------------------------------------------------------------*/
514: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
517:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
518:   const PetscScalar *x,*v;
519:   PetscScalar       *y,sum1, sum2, sum3, sum4;
520:   PetscErrorCode    ierr;
521:   PetscInt          nonzerorow=0,n,i,jrow,j;
522:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

525:   VecGetArray(xx,(PetscScalar**)&x);
526:   VecGetArray(yy,&y);
527:   idx  = a->j;
528:   v    = a->a;
529:   ii   = a->i;

531:   for (i=0; i<m; i++) {
532:     jrow = ii[i];
533:     n    = ii[i+1] - jrow;
534:     sum1  = 0.0;
535:     sum2  = 0.0;
536:     sum3  = 0.0;
537:     sum4  = 0.0;
538:     nonzerorow += (n>0);
539:     for (j=0; j<n; j++) {
540:       sum1 += v[jrow]*x[4*idx[jrow]];
541:       sum2 += v[jrow]*x[4*idx[jrow]+1];
542:       sum3 += v[jrow]*x[4*idx[jrow]+2];
543:       sum4 += v[jrow]*x[4*idx[jrow]+3];
544:       jrow++;
545:      }
546:     y[4*i]   = sum1;
547:     y[4*i+1] = sum2;
548:     y[4*i+2] = sum3;
549:     y[4*i+3] = sum4;
550:   }

552:   PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
553:   VecRestoreArray(xx,(PetscScalar**)&x);
554:   VecRestoreArray(yy,&y);
555:   return(0);
556: }

560: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
561: {
562:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
563:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
564:   const PetscScalar *x,*v;
565:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
566:   PetscErrorCode    ierr;
567:   PetscInt          n,i;
568:   const PetscInt    m = b->AIJ->rmap->n,*idx;

571:   VecSet(yy,0.0);
572:   VecGetArray(xx,(PetscScalar**)&x);
573:   VecGetArray(yy,&y);
574:   for (i=0; i<m; i++) {
575:     idx    = a->j + a->i[i] ;
576:     v      = a->a + a->i[i] ;
577:     n      = a->i[i+1] - a->i[i];
578:     alpha1 = x[4*i];
579:     alpha2 = x[4*i+1];
580:     alpha3 = x[4*i+2];
581:     alpha4 = x[4*i+3];
582:     while (n-->0) {
583:       y[4*(*idx)]   += alpha1*(*v);
584:       y[4*(*idx)+1] += alpha2*(*v);
585:       y[4*(*idx)+2] += alpha3*(*v);
586:       y[4*(*idx)+3] += alpha4*(*v);
587:       idx++; v++;
588:     }
589:   }
590:   PetscLogFlops(8.0*a->nz);
591:   VecRestoreArray(xx,(PetscScalar**)&x);
592:   VecRestoreArray(yy,&y);
593:   return(0);
594: }

598: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
599: {
600:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
601:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
602:   const PetscScalar *x,*v;
603:   PetscScalar       *y,sum1, sum2, sum3, sum4;
604:   PetscErrorCode    ierr;
605:   PetscInt          n,i,jrow,j;
606:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

609:   if (yy != zz) {VecCopy(yy,zz);}
610:   VecGetArray(xx,(PetscScalar**)&x);
611:   VecGetArray(zz,&y);
612:   idx  = a->j;
613:   v    = a->a;
614:   ii   = a->i;

616:   for (i=0; i<m; i++) {
617:     jrow = ii[i];
618:     n    = ii[i+1] - jrow;
619:     sum1  = 0.0;
620:     sum2  = 0.0;
621:     sum3  = 0.0;
622:     sum4  = 0.0;
623:     for (j=0; j<n; j++) {
624:       sum1 += v[jrow]*x[4*idx[jrow]];
625:       sum2 += v[jrow]*x[4*idx[jrow]+1];
626:       sum3 += v[jrow]*x[4*idx[jrow]+2];
627:       sum4 += v[jrow]*x[4*idx[jrow]+3];
628:       jrow++;
629:      }
630:     y[4*i]   += sum1;
631:     y[4*i+1] += sum2;
632:     y[4*i+2] += sum3;
633:     y[4*i+3] += sum4;
634:   }

636:   PetscLogFlops(8.0*a->nz);
637:   VecRestoreArray(xx,(PetscScalar**)&x);
638:   VecRestoreArray(zz,&y);
639:   return(0);
640: }
643: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
644: {
645:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
646:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
647:   const PetscScalar *x,*v;
648:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4;
649:   PetscErrorCode    ierr;
650:   PetscInt          n,i;
651:   const PetscInt    m = b->AIJ->rmap->n,*idx;

654:   if (yy != zz) {VecCopy(yy,zz);}
655:   VecGetArray(xx,(PetscScalar**)&x);
656:   VecGetArray(zz,&y);
657: 
658:   for (i=0; i<m; i++) {
659:     idx    = a->j + a->i[i] ;
660:     v      = a->a + a->i[i] ;
661:     n      = a->i[i+1] - a->i[i];
662:     alpha1 = x[4*i];
663:     alpha2 = x[4*i+1];
664:     alpha3 = x[4*i+2];
665:     alpha4 = x[4*i+3];
666:     while (n-->0) {
667:       y[4*(*idx)]   += alpha1*(*v);
668:       y[4*(*idx)+1] += alpha2*(*v);
669:       y[4*(*idx)+2] += alpha3*(*v);
670:       y[4*(*idx)+3] += alpha4*(*v);
671:       idx++; v++;
672:     }
673:   }
674:   PetscLogFlops(8.0*a->nz);
675:   VecRestoreArray(xx,(PetscScalar**)&x);
676:   VecRestoreArray(zz,&y);
677:   return(0);
678: }
679: /* ------------------------------------------------------------------------------*/

683: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684: {
685:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
686:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
687:   const PetscScalar *x,*v;
688:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
689:   PetscErrorCode    ierr;
690:   PetscInt          nonzerorow=0,n,i,jrow,j;
691:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

694:   VecGetArray(xx,(PetscScalar**)&x);
695:   VecGetArray(yy,&y);
696:   idx  = a->j;
697:   v    = a->a;
698:   ii   = a->i;

700:   for (i=0; i<m; i++) {
701:     jrow = ii[i];
702:     n    = ii[i+1] - jrow;
703:     sum1  = 0.0;
704:     sum2  = 0.0;
705:     sum3  = 0.0;
706:     sum4  = 0.0;
707:     sum5  = 0.0;
708:     nonzerorow += (n>0);
709:     for (j=0; j<n; j++) {
710:       sum1 += v[jrow]*x[5*idx[jrow]];
711:       sum2 += v[jrow]*x[5*idx[jrow]+1];
712:       sum3 += v[jrow]*x[5*idx[jrow]+2];
713:       sum4 += v[jrow]*x[5*idx[jrow]+3];
714:       sum5 += v[jrow]*x[5*idx[jrow]+4];
715:       jrow++;
716:      }
717:     y[5*i]   = sum1;
718:     y[5*i+1] = sum2;
719:     y[5*i+2] = sum3;
720:     y[5*i+3] = sum4;
721:     y[5*i+4] = sum5;
722:   }

724:   PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
725:   VecRestoreArray(xx,(PetscScalar**)&x);
726:   VecRestoreArray(yy,&y);
727:   return(0);
728: }

732: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
733: {
734:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
735:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
736:   const PetscScalar *x,*v;
737:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
738:   PetscErrorCode    ierr;
739:   PetscInt          n,i;
740:   const PetscInt    m = b->AIJ->rmap->n,*idx;

743:   VecSet(yy,0.0);
744:   VecGetArray(xx,(PetscScalar**)&x);
745:   VecGetArray(yy,&y);
746: 
747:   for (i=0; i<m; i++) {
748:     idx    = a->j + a->i[i] ;
749:     v      = a->a + a->i[i] ;
750:     n      = a->i[i+1] - a->i[i];
751:     alpha1 = x[5*i];
752:     alpha2 = x[5*i+1];
753:     alpha3 = x[5*i+2];
754:     alpha4 = x[5*i+3];
755:     alpha5 = x[5*i+4];
756:     while (n-->0) {
757:       y[5*(*idx)]   += alpha1*(*v);
758:       y[5*(*idx)+1] += alpha2*(*v);
759:       y[5*(*idx)+2] += alpha3*(*v);
760:       y[5*(*idx)+3] += alpha4*(*v);
761:       y[5*(*idx)+4] += alpha5*(*v);
762:       idx++; v++;
763:     }
764:   }
765:   PetscLogFlops(10.0*a->nz);
766:   VecRestoreArray(xx,(PetscScalar**)&x);
767:   VecRestoreArray(yy,&y);
768:   return(0);
769: }

773: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
774: {
775:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
776:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
777:   const PetscScalar *x,*v;
778:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5;
779:   PetscErrorCode    ierr;
780:   PetscInt          n,i,jrow,j;
781:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

784:   if (yy != zz) {VecCopy(yy,zz);}
785:   VecGetArray(xx,(PetscScalar**)&x);
786:   VecGetArray(zz,&y);
787:   idx  = a->j;
788:   v    = a->a;
789:   ii   = a->i;

791:   for (i=0; i<m; i++) {
792:     jrow = ii[i];
793:     n    = ii[i+1] - jrow;
794:     sum1  = 0.0;
795:     sum2  = 0.0;
796:     sum3  = 0.0;
797:     sum4  = 0.0;
798:     sum5  = 0.0;
799:     for (j=0; j<n; j++) {
800:       sum1 += v[jrow]*x[5*idx[jrow]];
801:       sum2 += v[jrow]*x[5*idx[jrow]+1];
802:       sum3 += v[jrow]*x[5*idx[jrow]+2];
803:       sum4 += v[jrow]*x[5*idx[jrow]+3];
804:       sum5 += v[jrow]*x[5*idx[jrow]+4];
805:       jrow++;
806:      }
807:     y[5*i]   += sum1;
808:     y[5*i+1] += sum2;
809:     y[5*i+2] += sum3;
810:     y[5*i+3] += sum4;
811:     y[5*i+4] += sum5;
812:   }

814:   PetscLogFlops(10.0*a->nz);
815:   VecRestoreArray(xx,(PetscScalar**)&x);
816:   VecRestoreArray(zz,&y);
817:   return(0);
818: }

822: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
823: {
824:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
825:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
826:   const PetscScalar *x,*v;
827:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5;
828:   PetscErrorCode    ierr;
829:   PetscInt          n,i;
830:   const PetscInt    m = b->AIJ->rmap->n,*idx;

833:   if (yy != zz) {VecCopy(yy,zz);}
834:   VecGetArray(xx,(PetscScalar**)&x);
835:   VecGetArray(zz,&y);
836: 
837:   for (i=0; i<m; i++) {
838:     idx    = a->j + a->i[i] ;
839:     v      = a->a + a->i[i] ;
840:     n      = a->i[i+1] - a->i[i];
841:     alpha1 = x[5*i];
842:     alpha2 = x[5*i+1];
843:     alpha3 = x[5*i+2];
844:     alpha4 = x[5*i+3];
845:     alpha5 = x[5*i+4];
846:     while (n-->0) {
847:       y[5*(*idx)]   += alpha1*(*v);
848:       y[5*(*idx)+1] += alpha2*(*v);
849:       y[5*(*idx)+2] += alpha3*(*v);
850:       y[5*(*idx)+3] += alpha4*(*v);
851:       y[5*(*idx)+4] += alpha5*(*v);
852:       idx++; v++;
853:     }
854:   }
855:   PetscLogFlops(10.0*a->nz);
856:   VecRestoreArray(xx,(PetscScalar**)&x);
857:   VecRestoreArray(zz,&y);
858:   return(0);
859: }

861: /* ------------------------------------------------------------------------------*/
864: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
865: {
866:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
867:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
868:   const PetscScalar *x,*v;
869:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
870:   PetscErrorCode    ierr;
871:   PetscInt          nonzerorow=0,n,i,jrow,j;
872:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

875:   VecGetArray(xx,(PetscScalar**)&x);
876:   VecGetArray(yy,&y);
877:   idx  = a->j;
878:   v    = a->a;
879:   ii   = a->i;

881:   for (i=0; i<m; i++) {
882:     jrow = ii[i];
883:     n    = ii[i+1] - jrow;
884:     sum1  = 0.0;
885:     sum2  = 0.0;
886:     sum3  = 0.0;
887:     sum4  = 0.0;
888:     sum5  = 0.0;
889:     sum6  = 0.0;
890:     nonzerorow += (n>0);
891:     for (j=0; j<n; j++) {
892:       sum1 += v[jrow]*x[6*idx[jrow]];
893:       sum2 += v[jrow]*x[6*idx[jrow]+1];
894:       sum3 += v[jrow]*x[6*idx[jrow]+2];
895:       sum4 += v[jrow]*x[6*idx[jrow]+3];
896:       sum5 += v[jrow]*x[6*idx[jrow]+4];
897:       sum6 += v[jrow]*x[6*idx[jrow]+5];
898:       jrow++;
899:      }
900:     y[6*i]   = sum1;
901:     y[6*i+1] = sum2;
902:     y[6*i+2] = sum3;
903:     y[6*i+3] = sum4;
904:     y[6*i+4] = sum5;
905:     y[6*i+5] = sum6;
906:   }

908:   PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
909:   VecRestoreArray(xx,(PetscScalar**)&x);
910:   VecRestoreArray(yy,&y);
911:   return(0);
912: }

916: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
917: {
918:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
919:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
920:   const PetscScalar *x,*v;
921:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
922:   PetscErrorCode    ierr;
923:   PetscInt          n,i;
924:   const PetscInt    m = b->AIJ->rmap->n,*idx;

927:   VecSet(yy,0.0);
928:   VecGetArray(xx,(PetscScalar**)&x);
929:   VecGetArray(yy,&y);

931:   for (i=0; i<m; i++) {
932:     idx    = a->j + a->i[i] ;
933:     v      = a->a + a->i[i] ;
934:     n      = a->i[i+1] - a->i[i];
935:     alpha1 = x[6*i];
936:     alpha2 = x[6*i+1];
937:     alpha3 = x[6*i+2];
938:     alpha4 = x[6*i+3];
939:     alpha5 = x[6*i+4];
940:     alpha6 = x[6*i+5];
941:     while (n-->0) {
942:       y[6*(*idx)]   += alpha1*(*v);
943:       y[6*(*idx)+1] += alpha2*(*v);
944:       y[6*(*idx)+2] += alpha3*(*v);
945:       y[6*(*idx)+3] += alpha4*(*v);
946:       y[6*(*idx)+4] += alpha5*(*v);
947:       y[6*(*idx)+5] += alpha6*(*v);
948:       idx++; v++;
949:     }
950:   }
951:   PetscLogFlops(12.0*a->nz);
952:   VecRestoreArray(xx,(PetscScalar**)&x);
953:   VecRestoreArray(yy,&y);
954:   return(0);
955: }

959: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
960: {
961:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
962:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
963:   const PetscScalar *x,*v;
964:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6;
965:   PetscErrorCode    ierr;
966:   PetscInt          n,i,jrow,j;
967:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;

970:   if (yy != zz) {VecCopy(yy,zz);}
971:   VecGetArray(xx,(PetscScalar**)&x);
972:   VecGetArray(zz,&y);
973:   idx  = a->j;
974:   v    = a->a;
975:   ii   = a->i;

977:   for (i=0; i<m; i++) {
978:     jrow = ii[i];
979:     n    = ii[i+1] - jrow;
980:     sum1  = 0.0;
981:     sum2  = 0.0;
982:     sum3  = 0.0;
983:     sum4  = 0.0;
984:     sum5  = 0.0;
985:     sum6  = 0.0;
986:     for (j=0; j<n; j++) {
987:       sum1 += v[jrow]*x[6*idx[jrow]];
988:       sum2 += v[jrow]*x[6*idx[jrow]+1];
989:       sum3 += v[jrow]*x[6*idx[jrow]+2];
990:       sum4 += v[jrow]*x[6*idx[jrow]+3];
991:       sum5 += v[jrow]*x[6*idx[jrow]+4];
992:       sum6 += v[jrow]*x[6*idx[jrow]+5];
993:       jrow++;
994:      }
995:     y[6*i]   += sum1;
996:     y[6*i+1] += sum2;
997:     y[6*i+2] += sum3;
998:     y[6*i+3] += sum4;
999:     y[6*i+4] += sum5;
1000:     y[6*i+5] += sum6;
1001:   }

1003:   PetscLogFlops(12.0*a->nz);
1004:   VecRestoreArray(xx,(PetscScalar**)&x);
1005:   VecRestoreArray(zz,&y);
1006:   return(0);
1007: }

1011: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1012: {
1013:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1014:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1015:   const PetscScalar *x,*v;
1016:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1017:   PetscErrorCode    ierr;
1018:   PetscInt          n,i;
1019:   const PetscInt    m = b->AIJ->rmap->n,*idx;

1022:   if (yy != zz) {VecCopy(yy,zz);}
1023:   VecGetArray(xx,(PetscScalar**)&x);
1024:   VecGetArray(zz,&y);
1025: 
1026:   for (i=0; i<m; i++) {
1027:     idx    = a->j + a->i[i] ;
1028:     v      = a->a + a->i[i] ;
1029:     n      = a->i[i+1] - a->i[i];
1030:     alpha1 = x[6*i];
1031:     alpha2 = x[6*i+1];
1032:     alpha3 = x[6*i+2];
1033:     alpha4 = x[6*i+3];
1034:     alpha5 = x[6*i+4];
1035:     alpha6 = x[6*i+5];
1036:     while (n-->0) {
1037:       y[6*(*idx)]   += alpha1*(*v);
1038:       y[6*(*idx)+1] += alpha2*(*v);
1039:       y[6*(*idx)+2] += alpha3*(*v);
1040:       y[6*(*idx)+3] += alpha4*(*v);
1041:       y[6*(*idx)+4] += alpha5*(*v);
1042:       y[6*(*idx)+5] += alpha6*(*v);
1043:       idx++; v++;
1044:     }
1045:   }
1046:   PetscLogFlops(12.0*a->nz);
1047:   VecRestoreArray(xx,(PetscScalar**)&x);
1048:   VecRestoreArray(zz,&y);
1049:   return(0);
1050: }

1052: /* ------------------------------------------------------------------------------*/
1055: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1056: {
1057:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1058:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1059:   const PetscScalar *x,*v;
1060:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1061:   PetscErrorCode    ierr;
1062:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1063:   PetscInt          nonzerorow=0,n,i,jrow,j;

1066:   VecGetArray(xx,(PetscScalar**)&x);
1067:   VecGetArray(yy,&y);
1068:   idx  = a->j;
1069:   v    = a->a;
1070:   ii   = a->i;

1072:   for (i=0; i<m; i++) {
1073:     jrow = ii[i];
1074:     n    = ii[i+1] - jrow;
1075:     sum1  = 0.0;
1076:     sum2  = 0.0;
1077:     sum3  = 0.0;
1078:     sum4  = 0.0;
1079:     sum5  = 0.0;
1080:     sum6  = 0.0;
1081:     sum7  = 0.0;
1082:     nonzerorow += (n>0);
1083:     for (j=0; j<n; j++) {
1084:       sum1 += v[jrow]*x[7*idx[jrow]];
1085:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1086:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1087:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1088:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1089:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1090:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1091:       jrow++;
1092:      }
1093:     y[7*i]   = sum1;
1094:     y[7*i+1] = sum2;
1095:     y[7*i+2] = sum3;
1096:     y[7*i+3] = sum4;
1097:     y[7*i+4] = sum5;
1098:     y[7*i+5] = sum6;
1099:     y[7*i+6] = sum7;
1100:   }

1102:   PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1103:   VecRestoreArray(xx,(PetscScalar**)&x);
1104:   VecRestoreArray(yy,&y);
1105:   return(0);
1106: }

1110: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1111: {
1112:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1113:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1114:   const PetscScalar *x,*v;
1115:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1116:   PetscErrorCode    ierr;
1117:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1118:   PetscInt          n,i;

1121:   VecSet(yy,0.0);
1122:   VecGetArray(xx,(PetscScalar**)&x);
1123:   VecGetArray(yy,&y);

1125:   for (i=0; i<m; i++) {
1126:     idx    = a->j + a->i[i] ;
1127:     v      = a->a + a->i[i] ;
1128:     n      = a->i[i+1] - a->i[i];
1129:     alpha1 = x[7*i];
1130:     alpha2 = x[7*i+1];
1131:     alpha3 = x[7*i+2];
1132:     alpha4 = x[7*i+3];
1133:     alpha5 = x[7*i+4];
1134:     alpha6 = x[7*i+5];
1135:     alpha7 = x[7*i+6];
1136:     while (n-->0) {
1137:       y[7*(*idx)]   += alpha1*(*v);
1138:       y[7*(*idx)+1] += alpha2*(*v);
1139:       y[7*(*idx)+2] += alpha3*(*v);
1140:       y[7*(*idx)+3] += alpha4*(*v);
1141:       y[7*(*idx)+4] += alpha5*(*v);
1142:       y[7*(*idx)+5] += alpha6*(*v);
1143:       y[7*(*idx)+6] += alpha7*(*v);
1144:       idx++; v++;
1145:     }
1146:   }
1147:   PetscLogFlops(14.0*a->nz);
1148:   VecRestoreArray(xx,(PetscScalar**)&x);
1149:   VecRestoreArray(yy,&y);
1150:   return(0);
1151: }

1155: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1156: {
1157:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1158:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1159:   const PetscScalar *x,*v;
1160:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1161:   PetscErrorCode    ierr;
1162:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1163:   PetscInt          n,i,jrow,j;

1166:   if (yy != zz) {VecCopy(yy,zz);}
1167:   VecGetArray(xx,(PetscScalar**)&x);
1168:   VecGetArray(zz,&y);
1169:   idx  = a->j;
1170:   v    = a->a;
1171:   ii   = a->i;

1173:   for (i=0; i<m; i++) {
1174:     jrow = ii[i];
1175:     n    = ii[i+1] - jrow;
1176:     sum1  = 0.0;
1177:     sum2  = 0.0;
1178:     sum3  = 0.0;
1179:     sum4  = 0.0;
1180:     sum5  = 0.0;
1181:     sum6  = 0.0;
1182:     sum7  = 0.0;
1183:     for (j=0; j<n; j++) {
1184:       sum1 += v[jrow]*x[7*idx[jrow]];
1185:       sum2 += v[jrow]*x[7*idx[jrow]+1];
1186:       sum3 += v[jrow]*x[7*idx[jrow]+2];
1187:       sum4 += v[jrow]*x[7*idx[jrow]+3];
1188:       sum5 += v[jrow]*x[7*idx[jrow]+4];
1189:       sum6 += v[jrow]*x[7*idx[jrow]+5];
1190:       sum7 += v[jrow]*x[7*idx[jrow]+6];
1191:       jrow++;
1192:      }
1193:     y[7*i]   += sum1;
1194:     y[7*i+1] += sum2;
1195:     y[7*i+2] += sum3;
1196:     y[7*i+3] += sum4;
1197:     y[7*i+4] += sum5;
1198:     y[7*i+5] += sum6;
1199:     y[7*i+6] += sum7;
1200:   }

1202:   PetscLogFlops(14.0*a->nz);
1203:   VecRestoreArray(xx,(PetscScalar**)&x);
1204:   VecRestoreArray(zz,&y);
1205:   return(0);
1206: }

1210: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1211: {
1212:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1213:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1214:   const PetscScalar *x,*v;
1215:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1216:   PetscErrorCode    ierr;
1217:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1218:   PetscInt          n,i;

1221:   if (yy != zz) {VecCopy(yy,zz);}
1222:   VecGetArray(xx,(PetscScalar**)&x);
1223:   VecGetArray(zz,&y);
1224:   for (i=0; i<m; i++) {
1225:     idx    = a->j + a->i[i] ;
1226:     v      = a->a + a->i[i] ;
1227:     n      = a->i[i+1] - a->i[i];
1228:     alpha1 = x[7*i];
1229:     alpha2 = x[7*i+1];
1230:     alpha3 = x[7*i+2];
1231:     alpha4 = x[7*i+3];
1232:     alpha5 = x[7*i+4];
1233:     alpha6 = x[7*i+5];
1234:     alpha7 = x[7*i+6];
1235:     while (n-->0) {
1236:       y[7*(*idx)]   += alpha1*(*v);
1237:       y[7*(*idx)+1] += alpha2*(*v);
1238:       y[7*(*idx)+2] += alpha3*(*v);
1239:       y[7*(*idx)+3] += alpha4*(*v);
1240:       y[7*(*idx)+4] += alpha5*(*v);
1241:       y[7*(*idx)+5] += alpha6*(*v);
1242:       y[7*(*idx)+6] += alpha7*(*v);
1243:       idx++; v++;
1244:     }
1245:   }
1246:   PetscLogFlops(14.0*a->nz);
1247:   VecRestoreArray(xx,(PetscScalar**)&x);
1248:   VecRestoreArray(zz,&y);
1249:   return(0);
1250: }

1254: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1255: {
1256:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1257:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1258:   const PetscScalar *x,*v;
1259:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260:   PetscErrorCode    ierr;
1261:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1262:   PetscInt          nonzerorow=0,n,i,jrow,j;

1265:   VecGetArray(xx,(PetscScalar**)&x);
1266:   VecGetArray(yy,&y);
1267:   idx  = a->j;
1268:   v    = a->a;
1269:   ii   = a->i;

1271:   for (i=0; i<m; i++) {
1272:     jrow = ii[i];
1273:     n    = ii[i+1] - jrow;
1274:     sum1  = 0.0;
1275:     sum2  = 0.0;
1276:     sum3  = 0.0;
1277:     sum4  = 0.0;
1278:     sum5  = 0.0;
1279:     sum6  = 0.0;
1280:     sum7  = 0.0;
1281:     sum8  = 0.0;
1282:     nonzerorow += (n>0);
1283:     for (j=0; j<n; j++) {
1284:       sum1 += v[jrow]*x[8*idx[jrow]];
1285:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1286:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1287:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1288:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1289:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1290:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1291:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1292:       jrow++;
1293:      }
1294:     y[8*i]   = sum1;
1295:     y[8*i+1] = sum2;
1296:     y[8*i+2] = sum3;
1297:     y[8*i+3] = sum4;
1298:     y[8*i+4] = sum5;
1299:     y[8*i+5] = sum6;
1300:     y[8*i+6] = sum7;
1301:     y[8*i+7] = sum8;
1302:   }

1304:   PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1305:   VecRestoreArray(xx,(PetscScalar**)&x);
1306:   VecRestoreArray(yy,&y);
1307:   return(0);
1308: }

1312: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1313: {
1314:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1315:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1316:   const PetscScalar *x,*v;
1317:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1318:   PetscErrorCode    ierr;
1319:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1320:   PetscInt          n,i;

1323:   VecSet(yy,0.0);
1324:   VecGetArray(xx,(PetscScalar**)&x);
1325:   VecGetArray(yy,&y);

1327:   for (i=0; i<m; i++) {
1328:     idx    = a->j + a->i[i] ;
1329:     v      = a->a + a->i[i] ;
1330:     n      = a->i[i+1] - a->i[i];
1331:     alpha1 = x[8*i];
1332:     alpha2 = x[8*i+1];
1333:     alpha3 = x[8*i+2];
1334:     alpha4 = x[8*i+3];
1335:     alpha5 = x[8*i+4];
1336:     alpha6 = x[8*i+5];
1337:     alpha7 = x[8*i+6];
1338:     alpha8 = x[8*i+7];
1339:     while (n-->0) {
1340:       y[8*(*idx)]   += alpha1*(*v);
1341:       y[8*(*idx)+1] += alpha2*(*v);
1342:       y[8*(*idx)+2] += alpha3*(*v);
1343:       y[8*(*idx)+3] += alpha4*(*v);
1344:       y[8*(*idx)+4] += alpha5*(*v);
1345:       y[8*(*idx)+5] += alpha6*(*v);
1346:       y[8*(*idx)+6] += alpha7*(*v);
1347:       y[8*(*idx)+7] += alpha8*(*v);
1348:       idx++; v++;
1349:     }
1350:   }
1351:   PetscLogFlops(16.0*a->nz);
1352:   VecRestoreArray(xx,(PetscScalar**)&x);
1353:   VecRestoreArray(yy,&y);
1354:   return(0);
1355: }

1359: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1360: {
1361:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1362:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1363:   const PetscScalar *x,*v;
1364:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1365:   PetscErrorCode    ierr;
1366:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1367:   PetscInt          n,i,jrow,j;

1370:   if (yy != zz) {VecCopy(yy,zz);}
1371:   VecGetArray(xx,(PetscScalar**)&x);
1372:   VecGetArray(zz,&y);
1373:   idx  = a->j;
1374:   v    = a->a;
1375:   ii   = a->i;

1377:   for (i=0; i<m; i++) {
1378:     jrow = ii[i];
1379:     n    = ii[i+1] - jrow;
1380:     sum1  = 0.0;
1381:     sum2  = 0.0;
1382:     sum3  = 0.0;
1383:     sum4  = 0.0;
1384:     sum5  = 0.0;
1385:     sum6  = 0.0;
1386:     sum7  = 0.0;
1387:     sum8  = 0.0;
1388:     for (j=0; j<n; j++) {
1389:       sum1 += v[jrow]*x[8*idx[jrow]];
1390:       sum2 += v[jrow]*x[8*idx[jrow]+1];
1391:       sum3 += v[jrow]*x[8*idx[jrow]+2];
1392:       sum4 += v[jrow]*x[8*idx[jrow]+3];
1393:       sum5 += v[jrow]*x[8*idx[jrow]+4];
1394:       sum6 += v[jrow]*x[8*idx[jrow]+5];
1395:       sum7 += v[jrow]*x[8*idx[jrow]+6];
1396:       sum8 += v[jrow]*x[8*idx[jrow]+7];
1397:       jrow++;
1398:      }
1399:     y[8*i]   += sum1;
1400:     y[8*i+1] += sum2;
1401:     y[8*i+2] += sum3;
1402:     y[8*i+3] += sum4;
1403:     y[8*i+4] += sum5;
1404:     y[8*i+5] += sum6;
1405:     y[8*i+6] += sum7;
1406:     y[8*i+7] += sum8;
1407:   }

1409:   PetscLogFlops(16.0*a->nz);
1410:   VecRestoreArray(xx,(PetscScalar**)&x);
1411:   VecRestoreArray(zz,&y);
1412:   return(0);
1413: }

1417: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1418: {
1419:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1420:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1421:   const PetscScalar *x,*v;
1422:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1423:   PetscErrorCode    ierr;
1424:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1425:   PetscInt          n,i;

1428:   if (yy != zz) {VecCopy(yy,zz);}
1429:   VecGetArray(xx,(PetscScalar**)&x);
1430:   VecGetArray(zz,&y);
1431:   for (i=0; i<m; i++) {
1432:     idx    = a->j + a->i[i] ;
1433:     v      = a->a + a->i[i] ;
1434:     n      = a->i[i+1] - a->i[i];
1435:     alpha1 = x[8*i];
1436:     alpha2 = x[8*i+1];
1437:     alpha3 = x[8*i+2];
1438:     alpha4 = x[8*i+3];
1439:     alpha5 = x[8*i+4];
1440:     alpha6 = x[8*i+5];
1441:     alpha7 = x[8*i+6];
1442:     alpha8 = x[8*i+7];
1443:     while (n-->0) {
1444:       y[8*(*idx)]   += alpha1*(*v);
1445:       y[8*(*idx)+1] += alpha2*(*v);
1446:       y[8*(*idx)+2] += alpha3*(*v);
1447:       y[8*(*idx)+3] += alpha4*(*v);
1448:       y[8*(*idx)+4] += alpha5*(*v);
1449:       y[8*(*idx)+5] += alpha6*(*v);
1450:       y[8*(*idx)+6] += alpha7*(*v);
1451:       y[8*(*idx)+7] += alpha8*(*v);
1452:       idx++; v++;
1453:     }
1454:   }
1455:   PetscLogFlops(16.0*a->nz);
1456:   VecRestoreArray(xx,(PetscScalar**)&x);
1457:   VecRestoreArray(zz,&y);
1458:   return(0);
1459: }

1461: /* ------------------------------------------------------------------------------*/
1464: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1465: {
1466:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1467:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1468:   const PetscScalar *x,*v;
1469:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1470:   PetscErrorCode    ierr;
1471:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1472:   PetscInt          nonzerorow=0,n,i,jrow,j;

1475:   VecGetArray(xx,(PetscScalar**)&x);
1476:   VecGetArray(yy,&y);
1477:   idx  = a->j;
1478:   v    = a->a;
1479:   ii   = a->i;

1481:   for (i=0; i<m; i++) {
1482:     jrow = ii[i];
1483:     n    = ii[i+1] - jrow;
1484:     sum1  = 0.0;
1485:     sum2  = 0.0;
1486:     sum3  = 0.0;
1487:     sum4  = 0.0;
1488:     sum5  = 0.0;
1489:     sum6  = 0.0;
1490:     sum7  = 0.0;
1491:     sum8  = 0.0;
1492:     sum9  = 0.0;
1493:     nonzerorow += (n>0);
1494:     for (j=0; j<n; j++) {
1495:       sum1 += v[jrow]*x[9*idx[jrow]];
1496:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1497:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1498:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1499:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1500:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1501:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1502:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1503:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1504:       jrow++;
1505:      }
1506:     y[9*i]   = sum1;
1507:     y[9*i+1] = sum2;
1508:     y[9*i+2] = sum3;
1509:     y[9*i+3] = sum4;
1510:     y[9*i+4] = sum5;
1511:     y[9*i+5] = sum6;
1512:     y[9*i+6] = sum7;
1513:     y[9*i+7] = sum8;
1514:     y[9*i+8] = sum9;
1515:   }

1517:   PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1518:   VecRestoreArray(xx,(PetscScalar**)&x);
1519:   VecRestoreArray(yy,&y);
1520:   return(0);
1521: }

1523: /* ------------------------------------------------------------------------------*/

1527: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1528: {
1529:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1530:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1531:   const PetscScalar *x,*v;
1532:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1533:   PetscErrorCode    ierr;
1534:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1535:   PetscInt          n,i;

1538:   VecSet(yy,0.0);
1539:   VecGetArray(xx,(PetscScalar**)&x);
1540:   VecGetArray(yy,&y);

1542:   for (i=0; i<m; i++) {
1543:     idx    = a->j + a->i[i] ;
1544:     v      = a->a + a->i[i] ;
1545:     n      = a->i[i+1] - a->i[i];
1546:     alpha1 = x[9*i];
1547:     alpha2 = x[9*i+1];
1548:     alpha3 = x[9*i+2];
1549:     alpha4 = x[9*i+3];
1550:     alpha5 = x[9*i+4];
1551:     alpha6 = x[9*i+5];
1552:     alpha7 = x[9*i+6];
1553:     alpha8 = x[9*i+7];
1554:     alpha9 = x[9*i+8];
1555:     while (n-->0) {
1556:       y[9*(*idx)]   += alpha1*(*v);
1557:       y[9*(*idx)+1] += alpha2*(*v);
1558:       y[9*(*idx)+2] += alpha3*(*v);
1559:       y[9*(*idx)+3] += alpha4*(*v);
1560:       y[9*(*idx)+4] += alpha5*(*v);
1561:       y[9*(*idx)+5] += alpha6*(*v);
1562:       y[9*(*idx)+6] += alpha7*(*v);
1563:       y[9*(*idx)+7] += alpha8*(*v);
1564:       y[9*(*idx)+8] += alpha9*(*v);
1565:       idx++; v++;
1566:     }
1567:   }
1568:   PetscLogFlops(18.0*a->nz);
1569:   VecRestoreArray(xx,(PetscScalar**)&x);
1570:   VecRestoreArray(yy,&y);
1571:   return(0);
1572: }

1576: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577: {
1578:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1579:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1580:   const PetscScalar *x,*v;
1581:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1582:   PetscErrorCode    ierr;
1583:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1584:   PetscInt          n,i,jrow,j;

1587:   if (yy != zz) {VecCopy(yy,zz);}
1588:   VecGetArray(xx,(PetscScalar**)&x);
1589:   VecGetArray(zz,&y);
1590:   idx  = a->j;
1591:   v    = a->a;
1592:   ii   = a->i;

1594:   for (i=0; i<m; i++) {
1595:     jrow = ii[i];
1596:     n    = ii[i+1] - jrow;
1597:     sum1  = 0.0;
1598:     sum2  = 0.0;
1599:     sum3  = 0.0;
1600:     sum4  = 0.0;
1601:     sum5  = 0.0;
1602:     sum6  = 0.0;
1603:     sum7  = 0.0;
1604:     sum8  = 0.0;
1605:     sum9  = 0.0;
1606:     for (j=0; j<n; j++) {
1607:       sum1 += v[jrow]*x[9*idx[jrow]];
1608:       sum2 += v[jrow]*x[9*idx[jrow]+1];
1609:       sum3 += v[jrow]*x[9*idx[jrow]+2];
1610:       sum4 += v[jrow]*x[9*idx[jrow]+3];
1611:       sum5 += v[jrow]*x[9*idx[jrow]+4];
1612:       sum6 += v[jrow]*x[9*idx[jrow]+5];
1613:       sum7 += v[jrow]*x[9*idx[jrow]+6];
1614:       sum8 += v[jrow]*x[9*idx[jrow]+7];
1615:       sum9 += v[jrow]*x[9*idx[jrow]+8];
1616:       jrow++;
1617:      }
1618:     y[9*i]   += sum1;
1619:     y[9*i+1] += sum2;
1620:     y[9*i+2] += sum3;
1621:     y[9*i+3] += sum4;
1622:     y[9*i+4] += sum5;
1623:     y[9*i+5] += sum6;
1624:     y[9*i+6] += sum7;
1625:     y[9*i+7] += sum8;
1626:     y[9*i+8] += sum9;
1627:   }

1629:   PetscLogFlops(18*a->nz);
1630:   VecRestoreArray(xx,(PetscScalar**)&x);
1631:   VecRestoreArray(zz,&y);
1632:   return(0);
1633: }

1637: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1638: {
1639:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1640:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1641:   const PetscScalar *x,*v;
1642:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1643:   PetscErrorCode    ierr;
1644:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1645:   PetscInt          n,i;

1648:   if (yy != zz) {VecCopy(yy,zz);}
1649:   VecGetArray(xx,(PetscScalar**)&x);
1650:   VecGetArray(zz,&y);
1651:   for (i=0; i<m; i++) {
1652:     idx    = a->j + a->i[i] ;
1653:     v      = a->a + a->i[i] ;
1654:     n      = a->i[i+1] - a->i[i];
1655:     alpha1 = x[9*i];
1656:     alpha2 = x[9*i+1];
1657:     alpha3 = x[9*i+2];
1658:     alpha4 = x[9*i+3];
1659:     alpha5 = x[9*i+4];
1660:     alpha6 = x[9*i+5];
1661:     alpha7 = x[9*i+6];
1662:     alpha8 = x[9*i+7];
1663:     alpha9 = x[9*i+8];
1664:     while (n-->0) {
1665:       y[9*(*idx)]   += alpha1*(*v);
1666:       y[9*(*idx)+1] += alpha2*(*v);
1667:       y[9*(*idx)+2] += alpha3*(*v);
1668:       y[9*(*idx)+3] += alpha4*(*v);
1669:       y[9*(*idx)+4] += alpha5*(*v);
1670:       y[9*(*idx)+5] += alpha6*(*v);
1671:       y[9*(*idx)+6] += alpha7*(*v);
1672:       y[9*(*idx)+7] += alpha8*(*v);
1673:       y[9*(*idx)+8] += alpha9*(*v);
1674:       idx++; v++;
1675:     }
1676:   }
1677:   PetscLogFlops(18.0*a->nz);
1678:   VecRestoreArray(xx,(PetscScalar**)&x);
1679:   VecRestoreArray(zz,&y);
1680:   return(0);
1681: }
1684: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1685: {
1686:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1687:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1688:   const PetscScalar *x,*v;
1689:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690:   PetscErrorCode    ierr;
1691:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1692:   PetscInt          nonzerorow=0,n,i,jrow,j;

1695:   VecGetArray(xx,(PetscScalar**)&x);
1696:   VecGetArray(yy,&y);
1697:   idx  = a->j;
1698:   v    = a->a;
1699:   ii   = a->i;

1701:   for (i=0; i<m; i++) {
1702:     jrow = ii[i];
1703:     n    = ii[i+1] - jrow;
1704:     sum1  = 0.0;
1705:     sum2  = 0.0;
1706:     sum3  = 0.0;
1707:     sum4  = 0.0;
1708:     sum5  = 0.0;
1709:     sum6  = 0.0;
1710:     sum7  = 0.0;
1711:     sum8  = 0.0;
1712:     sum9  = 0.0;
1713:     sum10 = 0.0;
1714:     nonzerorow += (n>0);
1715:     for (j=0; j<n; j++) {
1716:       sum1  += v[jrow]*x[10*idx[jrow]];
1717:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1718:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1719:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1720:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1721:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1722:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1723:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1724:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1725:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1726:       jrow++;
1727:      }
1728:     y[10*i]   = sum1;
1729:     y[10*i+1] = sum2;
1730:     y[10*i+2] = sum3;
1731:     y[10*i+3] = sum4;
1732:     y[10*i+4] = sum5;
1733:     y[10*i+5] = sum6;
1734:     y[10*i+6] = sum7;
1735:     y[10*i+7] = sum8;
1736:     y[10*i+8] = sum9;
1737:     y[10*i+9] = sum10;
1738:   }

1740:   PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1741:   VecRestoreArray(xx,(PetscScalar**)&x);
1742:   VecRestoreArray(yy,&y);
1743:   return(0);
1744: }

1748: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1749: {
1750:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1751:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1752:   const PetscScalar *x,*v;
1753:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1754:   PetscErrorCode    ierr;
1755:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1756:   PetscInt          n,i,jrow,j;

1759:   if (yy != zz) {VecCopy(yy,zz);}
1760:   VecGetArray(xx,(PetscScalar**)&x);
1761:   VecGetArray(zz,&y);
1762:   idx  = a->j;
1763:   v    = a->a;
1764:   ii   = a->i;

1766:   for (i=0; i<m; i++) {
1767:     jrow = ii[i];
1768:     n    = ii[i+1] - jrow;
1769:     sum1  = 0.0;
1770:     sum2  = 0.0;
1771:     sum3  = 0.0;
1772:     sum4  = 0.0;
1773:     sum5  = 0.0;
1774:     sum6  = 0.0;
1775:     sum7  = 0.0;
1776:     sum8  = 0.0;
1777:     sum9  = 0.0;
1778:     sum10 = 0.0;
1779:     for (j=0; j<n; j++) {
1780:       sum1  += v[jrow]*x[10*idx[jrow]];
1781:       sum2  += v[jrow]*x[10*idx[jrow]+1];
1782:       sum3  += v[jrow]*x[10*idx[jrow]+2];
1783:       sum4  += v[jrow]*x[10*idx[jrow]+3];
1784:       sum5  += v[jrow]*x[10*idx[jrow]+4];
1785:       sum6  += v[jrow]*x[10*idx[jrow]+5];
1786:       sum7  += v[jrow]*x[10*idx[jrow]+6];
1787:       sum8  += v[jrow]*x[10*idx[jrow]+7];
1788:       sum9  += v[jrow]*x[10*idx[jrow]+8];
1789:       sum10 += v[jrow]*x[10*idx[jrow]+9];
1790:       jrow++;
1791:      }
1792:     y[10*i]   += sum1;
1793:     y[10*i+1] += sum2;
1794:     y[10*i+2] += sum3;
1795:     y[10*i+3] += sum4;
1796:     y[10*i+4] += sum5;
1797:     y[10*i+5] += sum6;
1798:     y[10*i+6] += sum7;
1799:     y[10*i+7] += sum8;
1800:     y[10*i+8] += sum9;
1801:     y[10*i+9] += sum10;
1802:   }

1804:   PetscLogFlops(20.0*a->nz);
1805:   VecRestoreArray(xx,(PetscScalar**)&x);
1806:   VecRestoreArray(yy,&y);
1807:   return(0);
1808: }

1812: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1813: {
1814:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1815:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1816:   const PetscScalar *x,*v;
1817:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1818:   PetscErrorCode    ierr;
1819:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1820:   PetscInt          n,i;

1823:   VecSet(yy,0.0);
1824:   VecGetArray(xx,(PetscScalar**)&x);
1825:   VecGetArray(yy,&y);

1827:   for (i=0; i<m; i++) {
1828:     idx    = a->j + a->i[i] ;
1829:     v      = a->a + a->i[i] ;
1830:     n      = a->i[i+1] - a->i[i];
1831:     alpha1 = x[10*i];
1832:     alpha2 = x[10*i+1];
1833:     alpha3 = x[10*i+2];
1834:     alpha4 = x[10*i+3];
1835:     alpha5 = x[10*i+4];
1836:     alpha6 = x[10*i+5];
1837:     alpha7 = x[10*i+6];
1838:     alpha8 = x[10*i+7];
1839:     alpha9 = x[10*i+8];
1840:     alpha10 = x[10*i+9];
1841:     while (n-->0) {
1842:       y[10*(*idx)]   += alpha1*(*v);
1843:       y[10*(*idx)+1] += alpha2*(*v);
1844:       y[10*(*idx)+2] += alpha3*(*v);
1845:       y[10*(*idx)+3] += alpha4*(*v);
1846:       y[10*(*idx)+4] += alpha5*(*v);
1847:       y[10*(*idx)+5] += alpha6*(*v);
1848:       y[10*(*idx)+6] += alpha7*(*v);
1849:       y[10*(*idx)+7] += alpha8*(*v);
1850:       y[10*(*idx)+8] += alpha9*(*v);
1851:       y[10*(*idx)+9] += alpha10*(*v);
1852:       idx++; v++;
1853:     }
1854:   }
1855:   PetscLogFlops(20.0*a->nz);
1856:   VecRestoreArray(xx,(PetscScalar**)&x);
1857:   VecRestoreArray(yy,&y);
1858:   return(0);
1859: }

1863: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1864: {
1865:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1866:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1867:   const PetscScalar *x,*v;
1868:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1869:   PetscErrorCode    ierr;
1870:   const PetscInt    m = b->AIJ->rmap->n,*idx;
1871:   PetscInt          n,i;

1874:   if (yy != zz) {VecCopy(yy,zz);}
1875:   VecGetArray(xx,(PetscScalar**)&x);
1876:   VecGetArray(zz,&y);
1877:   for (i=0; i<m; i++) {
1878:     idx    = a->j + a->i[i] ;
1879:     v      = a->a + a->i[i] ;
1880:     n      = a->i[i+1] - a->i[i];
1881:     alpha1 = x[10*i];
1882:     alpha2 = x[10*i+1];
1883:     alpha3 = x[10*i+2];
1884:     alpha4 = x[10*i+3];
1885:     alpha5 = x[10*i+4];
1886:     alpha6 = x[10*i+5];
1887:     alpha7 = x[10*i+6];
1888:     alpha8 = x[10*i+7];
1889:     alpha9 = x[10*i+8];
1890:     alpha10 = x[10*i+9];
1891:     while (n-->0) {
1892:       y[10*(*idx)]   += alpha1*(*v);
1893:       y[10*(*idx)+1] += alpha2*(*v);
1894:       y[10*(*idx)+2] += alpha3*(*v);
1895:       y[10*(*idx)+3] += alpha4*(*v);
1896:       y[10*(*idx)+4] += alpha5*(*v);
1897:       y[10*(*idx)+5] += alpha6*(*v);
1898:       y[10*(*idx)+6] += alpha7*(*v);
1899:       y[10*(*idx)+7] += alpha8*(*v);
1900:       y[10*(*idx)+8] += alpha9*(*v);
1901:       y[10*(*idx)+9] += alpha10*(*v);
1902:       idx++; v++;
1903:     }
1904:   }
1905:   PetscLogFlops(20.0*a->nz);
1906:   VecRestoreArray(xx,(PetscScalar**)&x);
1907:   VecRestoreArray(zz,&y);
1908:   return(0);
1909: }


1912: /*--------------------------------------------------------------------------------------------*/
1915: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1916: {
1917:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1918:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1919:   const PetscScalar *x,*v;
1920:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1921:   PetscErrorCode    ierr;
1922:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1923:   PetscInt          nonzerorow=0,n,i,jrow,j;

1926:   VecGetArray(xx,(PetscScalar**)&x);
1927:   VecGetArray(yy,&y);
1928:   idx  = a->j;
1929:   v    = a->a;
1930:   ii   = a->i;

1932:   for (i=0; i<m; i++) {
1933:     jrow = ii[i];
1934:     n    = ii[i+1] - jrow;
1935:     sum1  = 0.0;
1936:     sum2  = 0.0;
1937:     sum3  = 0.0;
1938:     sum4  = 0.0;
1939:     sum5  = 0.0;
1940:     sum6  = 0.0;
1941:     sum7  = 0.0;
1942:     sum8  = 0.0;
1943:     sum9  = 0.0;
1944:     sum10 = 0.0;
1945:     sum11 = 0.0;
1946:     nonzerorow += (n>0);
1947:     for (j=0; j<n; j++) {
1948:       sum1  += v[jrow]*x[11*idx[jrow]];
1949:       sum2  += v[jrow]*x[11*idx[jrow]+1];
1950:       sum3  += v[jrow]*x[11*idx[jrow]+2];
1951:       sum4  += v[jrow]*x[11*idx[jrow]+3];
1952:       sum5  += v[jrow]*x[11*idx[jrow]+4];
1953:       sum6  += v[jrow]*x[11*idx[jrow]+5];
1954:       sum7  += v[jrow]*x[11*idx[jrow]+6];
1955:       sum8  += v[jrow]*x[11*idx[jrow]+7];
1956:       sum9  += v[jrow]*x[11*idx[jrow]+8];
1957:       sum10 += v[jrow]*x[11*idx[jrow]+9];
1958:       sum11 += v[jrow]*x[11*idx[jrow]+10];
1959:       jrow++;
1960:      }
1961:     y[11*i]   = sum1;
1962:     y[11*i+1] = sum2;
1963:     y[11*i+2] = sum3;
1964:     y[11*i+3] = sum4;
1965:     y[11*i+4] = sum5;
1966:     y[11*i+5] = sum6;
1967:     y[11*i+6] = sum7;
1968:     y[11*i+7] = sum8;
1969:     y[11*i+8] = sum9;
1970:     y[11*i+9] = sum10;
1971:     y[11*i+10] = sum11;
1972:   }

1974:   PetscLogFlops(22*a->nz - 11*nonzerorow);
1975:   VecRestoreArray(xx,(PetscScalar**)&x);
1976:   VecRestoreArray(yy,&y);
1977:   return(0);
1978: }

1982: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983: {
1984:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
1985:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
1986:   const PetscScalar *x,*v;
1987:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1988:   PetscErrorCode    ierr;
1989:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
1990:   PetscInt          n,i,jrow,j;

1993:   if (yy != zz) {VecCopy(yy,zz);}
1994:   VecGetArray(xx,(PetscScalar**)&x);
1995:   VecGetArray(zz,&y);
1996:   idx  = a->j;
1997:   v    = a->a;
1998:   ii   = a->i;

2000:   for (i=0; i<m; i++) {
2001:     jrow = ii[i];
2002:     n    = ii[i+1] - jrow;
2003:     sum1  = 0.0;
2004:     sum2  = 0.0;
2005:     sum3  = 0.0;
2006:     sum4  = 0.0;
2007:     sum5  = 0.0;
2008:     sum6  = 0.0;
2009:     sum7  = 0.0;
2010:     sum8  = 0.0;
2011:     sum9  = 0.0;
2012:     sum10 = 0.0;
2013:     sum11 = 0.0;
2014:     for (j=0; j<n; j++) {
2015:       sum1  += v[jrow]*x[11*idx[jrow]];
2016:       sum2  += v[jrow]*x[11*idx[jrow]+1];
2017:       sum3  += v[jrow]*x[11*idx[jrow]+2];
2018:       sum4  += v[jrow]*x[11*idx[jrow]+3];
2019:       sum5  += v[jrow]*x[11*idx[jrow]+4];
2020:       sum6  += v[jrow]*x[11*idx[jrow]+5];
2021:       sum7  += v[jrow]*x[11*idx[jrow]+6];
2022:       sum8  += v[jrow]*x[11*idx[jrow]+7];
2023:       sum9  += v[jrow]*x[11*idx[jrow]+8];
2024:       sum10 += v[jrow]*x[11*idx[jrow]+9];
2025:       sum11 += v[jrow]*x[11*idx[jrow]+10];
2026:       jrow++;
2027:      }
2028:     y[11*i]   += sum1;
2029:     y[11*i+1] += sum2;
2030:     y[11*i+2] += sum3;
2031:     y[11*i+3] += sum4;
2032:     y[11*i+4] += sum5;
2033:     y[11*i+5] += sum6;
2034:     y[11*i+6] += sum7;
2035:     y[11*i+7] += sum8;
2036:     y[11*i+8] += sum9;
2037:     y[11*i+9] += sum10;
2038:     y[11*i+10] += sum11;
2039:   }

2041:   PetscLogFlops(22*a->nz);
2042:   VecRestoreArray(xx,(PetscScalar**)&x);
2043:   VecRestoreArray(yy,&y);
2044:   return(0);
2045: }

2049: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2050: {
2051:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2052:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2053:   const PetscScalar *x,*v;
2054:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2055:   PetscErrorCode    ierr;
2056:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2057:   PetscInt          n,i;

2060:   VecSet(yy,0.0);
2061:   VecGetArray(xx,(PetscScalar**)&x);
2062:   VecGetArray(yy,&y);

2064:   for (i=0; i<m; i++) {
2065:     idx    = a->j + a->i[i] ;
2066:     v      = a->a + a->i[i] ;
2067:     n      = a->i[i+1] - a->i[i];
2068:     alpha1 = x[11*i];
2069:     alpha2 = x[11*i+1];
2070:     alpha3 = x[11*i+2];
2071:     alpha4 = x[11*i+3];
2072:     alpha5 = x[11*i+4];
2073:     alpha6 = x[11*i+5];
2074:     alpha7 = x[11*i+6];
2075:     alpha8 = x[11*i+7];
2076:     alpha9 = x[11*i+8];
2077:     alpha10 = x[11*i+9];
2078:     alpha11 = x[11*i+10];
2079:     while (n-->0) {
2080:       y[11*(*idx)]   += alpha1*(*v);
2081:       y[11*(*idx)+1] += alpha2*(*v);
2082:       y[11*(*idx)+2] += alpha3*(*v);
2083:       y[11*(*idx)+3] += alpha4*(*v);
2084:       y[11*(*idx)+4] += alpha5*(*v);
2085:       y[11*(*idx)+5] += alpha6*(*v);
2086:       y[11*(*idx)+6] += alpha7*(*v);
2087:       y[11*(*idx)+7] += alpha8*(*v);
2088:       y[11*(*idx)+8] += alpha9*(*v);
2089:       y[11*(*idx)+9] += alpha10*(*v);
2090:       y[11*(*idx)+10] += alpha11*(*v);
2091:       idx++; v++;
2092:     }
2093:   }
2094:   PetscLogFlops(22*a->nz);
2095:   VecRestoreArray(xx,(PetscScalar**)&x);
2096:   VecRestoreArray(yy,&y);
2097:   return(0);
2098: }

2102: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2103: {
2104:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2105:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2106:   const PetscScalar *x,*v;
2107:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2108:   PetscErrorCode    ierr;
2109:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2110:   PetscInt          n,i;

2113:   if (yy != zz) {VecCopy(yy,zz);}
2114:   VecGetArray(xx,(PetscScalar**)&x);
2115:   VecGetArray(zz,&y);
2116:   for (i=0; i<m; i++) {
2117:     idx    = a->j + a->i[i] ;
2118:     v      = a->a + a->i[i] ;
2119:     n      = a->i[i+1] - a->i[i];
2120:     alpha1 = x[11*i];
2121:     alpha2 = x[11*i+1];
2122:     alpha3 = x[11*i+2];
2123:     alpha4 = x[11*i+3];
2124:     alpha5 = x[11*i+4];
2125:     alpha6 = x[11*i+5];
2126:     alpha7 = x[11*i+6];
2127:     alpha8 = x[11*i+7];
2128:     alpha9 = x[11*i+8];
2129:     alpha10 = x[11*i+9];
2130:     alpha11 = x[11*i+10];
2131:     while (n-->0) {
2132:       y[11*(*idx)]   += alpha1*(*v);
2133:       y[11*(*idx)+1] += alpha2*(*v);
2134:       y[11*(*idx)+2] += alpha3*(*v);
2135:       y[11*(*idx)+3] += alpha4*(*v);
2136:       y[11*(*idx)+4] += alpha5*(*v);
2137:       y[11*(*idx)+5] += alpha6*(*v);
2138:       y[11*(*idx)+6] += alpha7*(*v);
2139:       y[11*(*idx)+7] += alpha8*(*v);
2140:       y[11*(*idx)+8] += alpha9*(*v);
2141:       y[11*(*idx)+9] += alpha10*(*v);
2142:       y[11*(*idx)+10] += alpha11*(*v);
2143:       idx++; v++;
2144:     }
2145:   }
2146:   PetscLogFlops(22*a->nz);
2147:   VecRestoreArray(xx,(PetscScalar**)&x);
2148:   VecRestoreArray(zz,&y);
2149:   return(0);
2150: }


2153: /*--------------------------------------------------------------------------------------------*/
2156: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2157: {
2158:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2159:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2160:   const PetscScalar *x,*v;
2161:   PetscScalar        *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2162:   PetscScalar        sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2163:   PetscErrorCode     ierr;
2164:   const PetscInt     m = b->AIJ->rmap->n,*idx,*ii;
2165:   PetscInt           nonzerorow=0,n,i,jrow,j;

2168:   VecGetArray(xx,(PetscScalar**)&x);
2169:   VecGetArray(yy,&y);
2170:   idx  = a->j;
2171:   v    = a->a;
2172:   ii   = a->i;

2174:   for (i=0; i<m; i++) {
2175:     jrow = ii[i];
2176:     n    = ii[i+1] - jrow;
2177:     sum1  = 0.0;
2178:     sum2  = 0.0;
2179:     sum3  = 0.0;
2180:     sum4  = 0.0;
2181:     sum5  = 0.0;
2182:     sum6  = 0.0;
2183:     sum7  = 0.0;
2184:     sum8  = 0.0;
2185:     sum9  = 0.0;
2186:     sum10 = 0.0;
2187:     sum11 = 0.0;
2188:     sum12 = 0.0;
2189:     sum13 = 0.0;
2190:     sum14 = 0.0;
2191:     sum15 = 0.0;
2192:     sum16 = 0.0;
2193:     nonzerorow += (n>0);
2194:     for (j=0; j<n; j++) {
2195:       sum1  += v[jrow]*x[16*idx[jrow]];
2196:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2197:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2198:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2199:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2200:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2201:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2202:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2203:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2204:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2205:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2206:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2207:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2208:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2209:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2210:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2211:       jrow++;
2212:      }
2213:     y[16*i]    = sum1;
2214:     y[16*i+1]  = sum2;
2215:     y[16*i+2]  = sum3;
2216:     y[16*i+3]  = sum4;
2217:     y[16*i+4]  = sum5;
2218:     y[16*i+5]  = sum6;
2219:     y[16*i+6]  = sum7;
2220:     y[16*i+7]  = sum8;
2221:     y[16*i+8]  = sum9;
2222:     y[16*i+9]  = sum10;
2223:     y[16*i+10] = sum11;
2224:     y[16*i+11] = sum12;
2225:     y[16*i+12] = sum13;
2226:     y[16*i+13] = sum14;
2227:     y[16*i+14] = sum15;
2228:     y[16*i+15] = sum16;
2229:   }

2231:   PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2232:   VecRestoreArray(xx,(PetscScalar**)&x);
2233:   VecRestoreArray(yy,&y);
2234:   return(0);
2235: }

2239: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2240: {
2241:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2242:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2243:   const PetscScalar *x,*v;
2244:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2245:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2246:   PetscErrorCode    ierr;
2247:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2248:   PetscInt          n,i;

2251:   VecSet(yy,0.0);
2252:   VecGetArray(xx,(PetscScalar**)&x);
2253:   VecGetArray(yy,&y);

2255:   for (i=0; i<m; i++) {
2256:     idx    = a->j + a->i[i] ;
2257:     v      = a->a + a->i[i] ;
2258:     n      = a->i[i+1] - a->i[i];
2259:     alpha1  = x[16*i];
2260:     alpha2  = x[16*i+1];
2261:     alpha3  = x[16*i+2];
2262:     alpha4  = x[16*i+3];
2263:     alpha5  = x[16*i+4];
2264:     alpha6  = x[16*i+5];
2265:     alpha7  = x[16*i+6];
2266:     alpha8  = x[16*i+7];
2267:     alpha9  = x[16*i+8];
2268:     alpha10 = x[16*i+9];
2269:     alpha11 = x[16*i+10];
2270:     alpha12 = x[16*i+11];
2271:     alpha13 = x[16*i+12];
2272:     alpha14 = x[16*i+13];
2273:     alpha15 = x[16*i+14];
2274:     alpha16 = x[16*i+15];
2275:     while (n-->0) {
2276:       y[16*(*idx)]    += alpha1*(*v);
2277:       y[16*(*idx)+1]  += alpha2*(*v);
2278:       y[16*(*idx)+2]  += alpha3*(*v);
2279:       y[16*(*idx)+3]  += alpha4*(*v);
2280:       y[16*(*idx)+4]  += alpha5*(*v);
2281:       y[16*(*idx)+5]  += alpha6*(*v);
2282:       y[16*(*idx)+6]  += alpha7*(*v);
2283:       y[16*(*idx)+7]  += alpha8*(*v);
2284:       y[16*(*idx)+8]  += alpha9*(*v);
2285:       y[16*(*idx)+9]  += alpha10*(*v);
2286:       y[16*(*idx)+10] += alpha11*(*v);
2287:       y[16*(*idx)+11] += alpha12*(*v);
2288:       y[16*(*idx)+12] += alpha13*(*v);
2289:       y[16*(*idx)+13] += alpha14*(*v);
2290:       y[16*(*idx)+14] += alpha15*(*v);
2291:       y[16*(*idx)+15] += alpha16*(*v);
2292:       idx++; v++;
2293:     }
2294:   }
2295:   PetscLogFlops(32.0*a->nz);
2296:   VecRestoreArray(xx,(PetscScalar**)&x);
2297:   VecRestoreArray(yy,&y);
2298:   return(0);
2299: }

2303: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304: {
2305:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2306:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2307:   const PetscScalar *x,*v;
2308:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2309:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2310:   PetscErrorCode    ierr;
2311:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2312:   PetscInt          n,i,jrow,j;

2315:   if (yy != zz) {VecCopy(yy,zz);}
2316:   VecGetArray(xx,(PetscScalar**)&x);
2317:   VecGetArray(zz,&y);
2318:   idx  = a->j;
2319:   v    = a->a;
2320:   ii   = a->i;

2322:   for (i=0; i<m; i++) {
2323:     jrow = ii[i];
2324:     n    = ii[i+1] - jrow;
2325:     sum1  = 0.0;
2326:     sum2  = 0.0;
2327:     sum3  = 0.0;
2328:     sum4  = 0.0;
2329:     sum5  = 0.0;
2330:     sum6  = 0.0;
2331:     sum7  = 0.0;
2332:     sum8  = 0.0;
2333:     sum9  = 0.0;
2334:     sum10 = 0.0;
2335:     sum11 = 0.0;
2336:     sum12 = 0.0;
2337:     sum13 = 0.0;
2338:     sum14 = 0.0;
2339:     sum15 = 0.0;
2340:     sum16 = 0.0;
2341:     for (j=0; j<n; j++) {
2342:       sum1  += v[jrow]*x[16*idx[jrow]];
2343:       sum2  += v[jrow]*x[16*idx[jrow]+1];
2344:       sum3  += v[jrow]*x[16*idx[jrow]+2];
2345:       sum4  += v[jrow]*x[16*idx[jrow]+3];
2346:       sum5  += v[jrow]*x[16*idx[jrow]+4];
2347:       sum6  += v[jrow]*x[16*idx[jrow]+5];
2348:       sum7  += v[jrow]*x[16*idx[jrow]+6];
2349:       sum8  += v[jrow]*x[16*idx[jrow]+7];
2350:       sum9  += v[jrow]*x[16*idx[jrow]+8];
2351:       sum10 += v[jrow]*x[16*idx[jrow]+9];
2352:       sum11 += v[jrow]*x[16*idx[jrow]+10];
2353:       sum12 += v[jrow]*x[16*idx[jrow]+11];
2354:       sum13 += v[jrow]*x[16*idx[jrow]+12];
2355:       sum14 += v[jrow]*x[16*idx[jrow]+13];
2356:       sum15 += v[jrow]*x[16*idx[jrow]+14];
2357:       sum16 += v[jrow]*x[16*idx[jrow]+15];
2358:       jrow++;
2359:      }
2360:     y[16*i]    += sum1;
2361:     y[16*i+1]  += sum2;
2362:     y[16*i+2]  += sum3;
2363:     y[16*i+3]  += sum4;
2364:     y[16*i+4]  += sum5;
2365:     y[16*i+5]  += sum6;
2366:     y[16*i+6]  += sum7;
2367:     y[16*i+7]  += sum8;
2368:     y[16*i+8]  += sum9;
2369:     y[16*i+9]  += sum10;
2370:     y[16*i+10] += sum11;
2371:     y[16*i+11] += sum12;
2372:     y[16*i+12] += sum13;
2373:     y[16*i+13] += sum14;
2374:     y[16*i+14] += sum15;
2375:     y[16*i+15] += sum16;
2376:   }

2378:   PetscLogFlops(32.0*a->nz);
2379:   VecRestoreArray(xx,(PetscScalar**)&x);
2380:   VecRestoreArray(zz,&y);
2381:   return(0);
2382: }

2386: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2387: {
2388:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2389:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2390:   const PetscScalar *x,*v;
2391:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2392:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2393:   PetscErrorCode    ierr;
2394:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2395:   PetscInt          n,i;

2398:   if (yy != zz) {VecCopy(yy,zz);}
2399:   VecGetArray(xx,(PetscScalar**)&x);
2400:   VecGetArray(zz,&y);
2401:   for (i=0; i<m; i++) {
2402:     idx    = a->j + a->i[i] ;
2403:     v      = a->a + a->i[i] ;
2404:     n      = a->i[i+1] - a->i[i];
2405:     alpha1 = x[16*i];
2406:     alpha2 = x[16*i+1];
2407:     alpha3 = x[16*i+2];
2408:     alpha4 = x[16*i+3];
2409:     alpha5 = x[16*i+4];
2410:     alpha6 = x[16*i+5];
2411:     alpha7 = x[16*i+6];
2412:     alpha8 = x[16*i+7];
2413:     alpha9  = x[16*i+8];
2414:     alpha10 = x[16*i+9];
2415:     alpha11 = x[16*i+10];
2416:     alpha12 = x[16*i+11];
2417:     alpha13 = x[16*i+12];
2418:     alpha14 = x[16*i+13];
2419:     alpha15 = x[16*i+14];
2420:     alpha16 = x[16*i+15];
2421:     while (n-->0) {
2422:       y[16*(*idx)]   += alpha1*(*v);
2423:       y[16*(*idx)+1] += alpha2*(*v);
2424:       y[16*(*idx)+2] += alpha3*(*v);
2425:       y[16*(*idx)+3] += alpha4*(*v);
2426:       y[16*(*idx)+4] += alpha5*(*v);
2427:       y[16*(*idx)+5] += alpha6*(*v);
2428:       y[16*(*idx)+6] += alpha7*(*v);
2429:       y[16*(*idx)+7] += alpha8*(*v);
2430:       y[16*(*idx)+8]  += alpha9*(*v);
2431:       y[16*(*idx)+9]  += alpha10*(*v);
2432:       y[16*(*idx)+10] += alpha11*(*v);
2433:       y[16*(*idx)+11] += alpha12*(*v);
2434:       y[16*(*idx)+12] += alpha13*(*v);
2435:       y[16*(*idx)+13] += alpha14*(*v);
2436:       y[16*(*idx)+14] += alpha15*(*v);
2437:       y[16*(*idx)+15] += alpha16*(*v);
2438:       idx++; v++;
2439:     }
2440:   }
2441:   PetscLogFlops(32.0*a->nz);
2442:   VecRestoreArray(xx,(PetscScalar**)&x);
2443:   VecRestoreArray(zz,&y);
2444:   return(0);
2445: }

2447: /*--------------------------------------------------------------------------------------------*/
2450: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451: {
2452:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2453:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2454:   const PetscScalar *x,*v;
2455:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2456:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2457:   PetscErrorCode    ierr;
2458:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2459:   PetscInt          nonzerorow=0,n,i,jrow,j;

2462:   VecGetArray(xx,(PetscScalar**)&x);
2463:   VecGetArray(yy,&y);
2464:   idx  = a->j;
2465:   v    = a->a;
2466:   ii   = a->i;

2468:   for (i=0; i<m; i++) {
2469:     jrow = ii[i];
2470:     n    = ii[i+1] - jrow;
2471:     sum1  = 0.0;
2472:     sum2  = 0.0;
2473:     sum3  = 0.0;
2474:     sum4  = 0.0;
2475:     sum5  = 0.0;
2476:     sum6  = 0.0;
2477:     sum7  = 0.0;
2478:     sum8  = 0.0;
2479:     sum9  = 0.0;
2480:     sum10 = 0.0;
2481:     sum11 = 0.0;
2482:     sum12 = 0.0;
2483:     sum13 = 0.0;
2484:     sum14 = 0.0;
2485:     sum15 = 0.0;
2486:     sum16 = 0.0;
2487:     sum17 = 0.0;
2488:     sum18 = 0.0;
2489:     nonzerorow += (n>0);
2490:     for (j=0; j<n; j++) {
2491:       sum1  += v[jrow]*x[18*idx[jrow]];
2492:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2493:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2494:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2495:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2496:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2497:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2498:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2499:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2500:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2501:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2502:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2503:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2504:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2505:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2506:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2507:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2508:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2509:       jrow++;
2510:      }
2511:     y[18*i]    = sum1;
2512:     y[18*i+1]  = sum2;
2513:     y[18*i+2]  = sum3;
2514:     y[18*i+3]  = sum4;
2515:     y[18*i+4]  = sum5;
2516:     y[18*i+5]  = sum6;
2517:     y[18*i+6]  = sum7;
2518:     y[18*i+7]  = sum8;
2519:     y[18*i+8]  = sum9;
2520:     y[18*i+9]  = sum10;
2521:     y[18*i+10] = sum11;
2522:     y[18*i+11] = sum12;
2523:     y[18*i+12] = sum13;
2524:     y[18*i+13] = sum14;
2525:     y[18*i+14] = sum15;
2526:     y[18*i+15] = sum16;
2527:     y[18*i+16] = sum17;
2528:     y[18*i+17] = sum18;
2529:   }

2531:   PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2532:   VecRestoreArray(xx,(PetscScalar**)&x);
2533:   VecRestoreArray(yy,&y);
2534:   return(0);
2535: }

2539: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2540: {
2541:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2542:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2543:   const PetscScalar *x,*v;
2544:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2545:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2546:   PetscErrorCode    ierr;
2547:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2548:   PetscInt          n,i;

2551:   VecSet(yy,0.0);
2552:   VecGetArray(xx,(PetscScalar**)&x);
2553:   VecGetArray(yy,&y);

2555:   for (i=0; i<m; i++) {
2556:     idx    = a->j + a->i[i] ;
2557:     v      = a->a + a->i[i] ;
2558:     n      = a->i[i+1] - a->i[i];
2559:     alpha1  = x[18*i];
2560:     alpha2  = x[18*i+1];
2561:     alpha3  = x[18*i+2];
2562:     alpha4  = x[18*i+3];
2563:     alpha5  = x[18*i+4];
2564:     alpha6  = x[18*i+5];
2565:     alpha7  = x[18*i+6];
2566:     alpha8  = x[18*i+7];
2567:     alpha9  = x[18*i+8];
2568:     alpha10 = x[18*i+9];
2569:     alpha11 = x[18*i+10];
2570:     alpha12 = x[18*i+11];
2571:     alpha13 = x[18*i+12];
2572:     alpha14 = x[18*i+13];
2573:     alpha15 = x[18*i+14];
2574:     alpha16 = x[18*i+15];
2575:     alpha17 = x[18*i+16];
2576:     alpha18 = x[18*i+17];
2577:     while (n-->0) {
2578:       y[18*(*idx)]    += alpha1*(*v);
2579:       y[18*(*idx)+1]  += alpha2*(*v);
2580:       y[18*(*idx)+2]  += alpha3*(*v);
2581:       y[18*(*idx)+3]  += alpha4*(*v);
2582:       y[18*(*idx)+4]  += alpha5*(*v);
2583:       y[18*(*idx)+5]  += alpha6*(*v);
2584:       y[18*(*idx)+6]  += alpha7*(*v);
2585:       y[18*(*idx)+7]  += alpha8*(*v);
2586:       y[18*(*idx)+8]  += alpha9*(*v);
2587:       y[18*(*idx)+9]  += alpha10*(*v);
2588:       y[18*(*idx)+10] += alpha11*(*v);
2589:       y[18*(*idx)+11] += alpha12*(*v);
2590:       y[18*(*idx)+12] += alpha13*(*v);
2591:       y[18*(*idx)+13] += alpha14*(*v);
2592:       y[18*(*idx)+14] += alpha15*(*v);
2593:       y[18*(*idx)+15] += alpha16*(*v);
2594:       y[18*(*idx)+16] += alpha17*(*v);
2595:       y[18*(*idx)+17] += alpha18*(*v);
2596:       idx++; v++;
2597:     }
2598:   }
2599:   PetscLogFlops(36.0*a->nz);
2600:   VecRestoreArray(xx,(PetscScalar**)&x);
2601:   VecRestoreArray(yy,&y);
2602:   return(0);
2603: }

2607: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608: {
2609:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2610:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2611:   const PetscScalar *x,*v;
2612:   PetscScalar       *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2613:   PetscScalar       sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2614:   PetscErrorCode    ierr;
2615:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2616:   PetscInt          n,i,jrow,j;

2619:   if (yy != zz) {VecCopy(yy,zz);}
2620:   VecGetArray(xx,(PetscScalar**)&x);
2621:   VecGetArray(zz,&y);
2622:   idx  = a->j;
2623:   v    = a->a;
2624:   ii   = a->i;

2626:   for (i=0; i<m; i++) {
2627:     jrow = ii[i];
2628:     n    = ii[i+1] - jrow;
2629:     sum1  = 0.0;
2630:     sum2  = 0.0;
2631:     sum3  = 0.0;
2632:     sum4  = 0.0;
2633:     sum5  = 0.0;
2634:     sum6  = 0.0;
2635:     sum7  = 0.0;
2636:     sum8  = 0.0;
2637:     sum9  = 0.0;
2638:     sum10 = 0.0;
2639:     sum11 = 0.0;
2640:     sum12 = 0.0;
2641:     sum13 = 0.0;
2642:     sum14 = 0.0;
2643:     sum15 = 0.0;
2644:     sum16 = 0.0;
2645:     sum17 = 0.0;
2646:     sum18 = 0.0;
2647:     for (j=0; j<n; j++) {
2648:       sum1  += v[jrow]*x[18*idx[jrow]];
2649:       sum2  += v[jrow]*x[18*idx[jrow]+1];
2650:       sum3  += v[jrow]*x[18*idx[jrow]+2];
2651:       sum4  += v[jrow]*x[18*idx[jrow]+3];
2652:       sum5  += v[jrow]*x[18*idx[jrow]+4];
2653:       sum6  += v[jrow]*x[18*idx[jrow]+5];
2654:       sum7  += v[jrow]*x[18*idx[jrow]+6];
2655:       sum8  += v[jrow]*x[18*idx[jrow]+7];
2656:       sum9  += v[jrow]*x[18*idx[jrow]+8];
2657:       sum10 += v[jrow]*x[18*idx[jrow]+9];
2658:       sum11 += v[jrow]*x[18*idx[jrow]+10];
2659:       sum12 += v[jrow]*x[18*idx[jrow]+11];
2660:       sum13 += v[jrow]*x[18*idx[jrow]+12];
2661:       sum14 += v[jrow]*x[18*idx[jrow]+13];
2662:       sum15 += v[jrow]*x[18*idx[jrow]+14];
2663:       sum16 += v[jrow]*x[18*idx[jrow]+15];
2664:       sum17 += v[jrow]*x[18*idx[jrow]+16];
2665:       sum18 += v[jrow]*x[18*idx[jrow]+17];
2666:       jrow++;
2667:      }
2668:     y[18*i]    += sum1;
2669:     y[18*i+1]  += sum2;
2670:     y[18*i+2]  += sum3;
2671:     y[18*i+3]  += sum4;
2672:     y[18*i+4]  += sum5;
2673:     y[18*i+5]  += sum6;
2674:     y[18*i+6]  += sum7;
2675:     y[18*i+7]  += sum8;
2676:     y[18*i+8]  += sum9;
2677:     y[18*i+9]  += sum10;
2678:     y[18*i+10] += sum11;
2679:     y[18*i+11] += sum12;
2680:     y[18*i+12] += sum13;
2681:     y[18*i+13] += sum14;
2682:     y[18*i+14] += sum15;
2683:     y[18*i+15] += sum16;
2684:     y[18*i+16] += sum17;
2685:     y[18*i+17] += sum18;
2686:   }

2688:   PetscLogFlops(36.0*a->nz);
2689:   VecRestoreArray(xx,(PetscScalar**)&x);
2690:   VecRestoreArray(zz,&y);
2691:   return(0);
2692: }

2696: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2697: {
2698:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2699:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2700:   const PetscScalar *x,*v;
2701:   PetscScalar       *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2702:   PetscScalar       alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2703:   PetscErrorCode    ierr;
2704:   const PetscInt    m = b->AIJ->rmap->n,*idx;
2705:   PetscInt          n,i;

2708:   if (yy != zz) {VecCopy(yy,zz);}
2709:   VecGetArray(xx,(PetscScalar**)&x);
2710:   VecGetArray(zz,&y);
2711:   for (i=0; i<m; i++) {
2712:     idx    = a->j + a->i[i] ;
2713:     v      = a->a + a->i[i] ;
2714:     n      = a->i[i+1] - a->i[i];
2715:     alpha1 = x[18*i];
2716:     alpha2 = x[18*i+1];
2717:     alpha3 = x[18*i+2];
2718:     alpha4 = x[18*i+3];
2719:     alpha5 = x[18*i+4];
2720:     alpha6 = x[18*i+5];
2721:     alpha7 = x[18*i+6];
2722:     alpha8 = x[18*i+7];
2723:     alpha9  = x[18*i+8];
2724:     alpha10 = x[18*i+9];
2725:     alpha11 = x[18*i+10];
2726:     alpha12 = x[18*i+11];
2727:     alpha13 = x[18*i+12];
2728:     alpha14 = x[18*i+13];
2729:     alpha15 = x[18*i+14];
2730:     alpha16 = x[18*i+15];
2731:     alpha17 = x[18*i+16];
2732:     alpha18 = x[18*i+17];
2733:     while (n-->0) {
2734:       y[18*(*idx)]   += alpha1*(*v);
2735:       y[18*(*idx)+1] += alpha2*(*v);
2736:       y[18*(*idx)+2] += alpha3*(*v);
2737:       y[18*(*idx)+3] += alpha4*(*v);
2738:       y[18*(*idx)+4] += alpha5*(*v);
2739:       y[18*(*idx)+5] += alpha6*(*v);
2740:       y[18*(*idx)+6] += alpha7*(*v);
2741:       y[18*(*idx)+7] += alpha8*(*v);
2742:       y[18*(*idx)+8]  += alpha9*(*v);
2743:       y[18*(*idx)+9]  += alpha10*(*v);
2744:       y[18*(*idx)+10] += alpha11*(*v);
2745:       y[18*(*idx)+11] += alpha12*(*v);
2746:       y[18*(*idx)+12] += alpha13*(*v);
2747:       y[18*(*idx)+13] += alpha14*(*v);
2748:       y[18*(*idx)+14] += alpha15*(*v);
2749:       y[18*(*idx)+15] += alpha16*(*v);
2750:       y[18*(*idx)+16] += alpha17*(*v);
2751:       y[18*(*idx)+17] += alpha18*(*v);
2752:       idx++; v++;
2753:     }
2754:   }
2755:   PetscLogFlops(36.0*a->nz);
2756:   VecRestoreArray(xx,(PetscScalar**)&x);
2757:   VecRestoreArray(zz,&y);
2758:   return(0);
2759: }

2763: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2764: {
2765:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2766:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2767:   const PetscScalar *x,*v;
2768:   PetscScalar       *y,*sums;
2769:   PetscErrorCode    ierr;
2770:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2771:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2774:   VecGetArray(xx,(PetscScalar**)&x);
2775:   VecSet(yy,0.0);
2776:   VecGetArray(yy,&y);
2777:   idx  = a->j;
2778:   v    = a->a;
2779:   ii   = a->i;

2781:   for (i=0; i<m; i++) {
2782:     jrow = ii[i];
2783:     n    = ii[i+1] - jrow;
2784:     sums = y + dof*i;
2785:     for (j=0; j<n; j++) {
2786:       for (k=0; k<dof; k++) {
2787:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2788:       }
2789:       jrow++;
2790:     }
2791:   }

2793:   PetscLogFlops(2.0*dof*a->nz);
2794:   VecRestoreArray(xx,(PetscScalar**)&x);
2795:   VecRestoreArray(yy,&y);
2796:   return(0);
2797: }

2801: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2802: {
2803:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2804:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2805:   const PetscScalar *x,*v;
2806:   PetscScalar       *y,*sums;
2807:   PetscErrorCode    ierr;
2808:   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
2809:   PetscInt          n,i,jrow,j,dof = b->dof,k;

2812:   if (yy != zz) {VecCopy(yy,zz);}
2813:   VecGetArray(xx,(PetscScalar**)&x);
2814:   VecGetArray(zz,&y);
2815:   idx  = a->j;
2816:   v    = a->a;
2817:   ii   = a->i;

2819:   for (i=0; i<m; i++) {
2820:     jrow = ii[i];
2821:     n    = ii[i+1] - jrow;
2822:     sums = y + dof*i;
2823:     for (j=0; j<n; j++) {
2824:       for (k=0; k<dof; k++) {
2825:         sums[k]  += v[jrow]*x[dof*idx[jrow]+k];
2826:       }
2827:       jrow++;
2828:     }
2829:   }

2831:   PetscLogFlops(2.0*dof*a->nz);
2832:   VecRestoreArray(xx,(PetscScalar**)&x);
2833:   VecRestoreArray(zz,&y);
2834:   return(0);
2835: }

2839: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2840: {
2841:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2842:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2843:   const PetscScalar *x,*v,*alpha;
2844:   PetscScalar       *y;
2845:   PetscErrorCode    ierr;
2846:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2847:   PetscInt          n,i,k;

2850:   VecGetArray(xx,(PetscScalar**)&x);
2851:   VecSet(yy,0.0);
2852:   VecGetArray(yy,&y);
2853:   for (i=0; i<m; i++) {
2854:     idx    = a->j + a->i[i] ;
2855:     v      = a->a + a->i[i] ;
2856:     n      = a->i[i+1] - a->i[i];
2857:     alpha  = x + dof*i;
2858:     while (n-->0) {
2859:       for (k=0; k<dof; k++) {
2860:         y[dof*(*idx)+k] += alpha[k]*(*v);
2861:       }
2862:       idx++; v++;
2863:     }
2864:   }
2865:   PetscLogFlops(2.0*dof*a->nz);
2866:   VecRestoreArray(xx,(PetscScalar**)&x);
2867:   VecRestoreArray(yy,&y);
2868:   return(0);
2869: }

2873: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2874: {
2875:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
2876:   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
2877:   const PetscScalar *x,*v,*alpha;
2878:   PetscScalar       *y;
2879:   PetscErrorCode    ierr;
2880:   const PetscInt    m = b->AIJ->rmap->n,*idx,dof = b->dof;
2881:   PetscInt          n,i,k;

2884:   if (yy != zz) {VecCopy(yy,zz);}
2885:   VecGetArray(xx,(PetscScalar**)&x);
2886:   VecGetArray(zz,&y);
2887:   for (i=0; i<m; i++) {
2888:     idx    = a->j + a->i[i] ;
2889:     v      = a->a + a->i[i] ;
2890:     n      = a->i[i+1] - a->i[i];
2891:     alpha  = x + dof*i;
2892:     while (n-->0) {
2893:       for (k=0; k<dof; k++) {
2894:         y[dof*(*idx)+k] += alpha[k]*(*v);
2895:       }
2896:       idx++; v++;
2897:     }
2898:   }
2899:   PetscLogFlops(2.0*dof*a->nz);
2900:   VecRestoreArray(xx,(PetscScalar**)&x);
2901:   VecRestoreArray(zz,&y);
2902:   return(0);
2903: }

2905: /*===================================================================================*/
2908: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2909: {
2910:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2914:   /* start the scatter */
2915:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2916:   (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2917:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2918:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2919:   return(0);
2920: }

2924: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2925: {
2926:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2930:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2931:   (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2932:   VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2933:   VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2934:   return(0);
2935: }

2939: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2940: {
2941:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2945:   /* start the scatter */
2946:   VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2947:   (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2948:   VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2949:   (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2950:   return(0);
2951: }

2955: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2956: {
2957:   Mat_MPIMAIJ    *b = (Mat_MPIMAIJ*)A->data;

2961:   (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2962:   VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2963:   (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2964:   VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2965:   return(0);
2966: }

2968: /* ----------------------------------------------------------------*/
2971: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2972: {
2973:   /* This routine requires testing -- but it's getting better. */
2974:   PetscErrorCode     ierr;
2975:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2976:   Mat_SeqMAIJ        *pp=(Mat_SeqMAIJ*)PP->data;
2977:   Mat                P=pp->AIJ;
2978:   Mat_SeqAIJ         *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2979:   PetscInt           *pti,*ptj,*ptJ;
2980:   PetscInt           *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2981:   const PetscInt     an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2982:   PetscInt           i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2983:   MatScalar          *ca;
2984:   const PetscInt     *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;

2987:   /* Start timer */
2988:   PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);

2990:   /* Get ij structure of P^T */
2991:   MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

2993:   cn = pn*ppdof;
2994:   /* Allocate ci array, arrays for fill computation and */
2995:   /* free space for accumulating nonzero column info */
2996:   PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2997:   ci[0] = 0;

2999:   /* Work arrays for rows of P^T*A */
3000:   PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);

3002:   /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3003:   /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3004:   /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3005:   PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3006:   current_space = free_space;

3008:   /* Determine symbolic info for each row of C: */
3009:   for (i=0;i<pn;i++) {
3010:     ptnzi  = pti[i+1] - pti[i];
3011:     ptJ    = ptj + pti[i];
3012:     for (dof=0;dof<ppdof;dof++) {
3013:       ptanzi = 0;
3014:       /* Determine symbolic row of PtA: */
3015:       for (j=0;j<ptnzi;j++) {
3016:         /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3017:         arow = ptJ[j]*ppdof + dof;
3018:         /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3019:         anzj = ai[arow+1] - ai[arow];
3020:         ajj  = aj + ai[arow];
3021:         for (k=0;k<anzj;k++) {
3022:           if (!ptadenserow[ajj[k]]) {
3023:             ptadenserow[ajj[k]]    = -1;
3024:             ptasparserow[ptanzi++] = ajj[k];
3025:           }
3026:         }
3027:       }
3028:       /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3029:       ptaj = ptasparserow;
3030:       cnzi   = 0;
3031:       for (j=0;j<ptanzi;j++) {
3032:         /* Get offset within block of P */
3033:         pshift = *ptaj%ppdof;
3034:         /* Get block row of P */
3035:         prow = (*ptaj++)/ppdof; /* integer division */
3036:         /* P has same number of nonzeros per row as the compressed form */
3037:         pnzj = pi[prow+1] - pi[prow];
3038:         pjj  = pj + pi[prow];
3039:         for (k=0;k<pnzj;k++) {
3040:           /* Locations in C are shifted by the offset within the block */
3041:           /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3042:           if (!denserow[pjj[k]*ppdof+pshift]) {
3043:             denserow[pjj[k]*ppdof+pshift] = -1;
3044:             sparserow[cnzi++]             = pjj[k]*ppdof+pshift;
3045:           }
3046:         }
3047:       }

3049:       /* sort sparserow */
3050:       PetscSortInt(cnzi,sparserow);
3051: 
3052:       /* If free space is not available, make more free space */
3053:       /* Double the amount of total space in the list */
3054:       if (current_space->local_remaining<cnzi) {
3055:         PetscFreeSpaceGet(cnzi+current_space->total_array_size,&current_space);
3056:       }

3058:       /* Copy data into free space, and zero out denserows */
3059:       PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3060:       current_space->array           += cnzi;
3061:       current_space->local_used      += cnzi;
3062:       current_space->local_remaining -= cnzi;

3064:       for (j=0;j<ptanzi;j++) {
3065:         ptadenserow[ptasparserow[j]] = 0;
3066:       }
3067:       for (j=0;j<cnzi;j++) {
3068:         denserow[sparserow[j]] = 0;
3069:       }
3070:       /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3071:       /*        For now, we will recompute what is needed. */
3072:       ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3073:     }
3074:   }
3075:   /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3076:   /* Allocate space for cj, initialize cj, and */
3077:   /* destroy list of free space and other temporary array(s) */
3078:   PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3079:   PetscFreeSpaceContiguous(&free_space,cj);
3080:   PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3081: 
3082:   /* Allocate space for ca */
3083:   PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3084:   PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3085: 
3086:   /* put together the new matrix */
3087:   MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);

3089:   /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3090:   /* Since these are PETSc arrays, change flags to free them as necessary. */
3091:   c          = (Mat_SeqAIJ *)((*C)->data);
3092:   c->free_a  = PETSC_TRUE;
3093:   c->free_ij = PETSC_TRUE;
3094:   c->nonew   = 0;

3096:   /* Clean up. */
3097:   MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);

3099:   PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3100:   return(0);
3101: }

3105: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3106: {
3107:   /* This routine requires testing -- first draft only */
3108:   PetscErrorCode  ierr;
3109:   Mat_SeqMAIJ     *pp=(Mat_SeqMAIJ*)PP->data;
3110:   Mat             P=pp->AIJ;
3111:   Mat_SeqAIJ      *a  = (Mat_SeqAIJ *) A->data;
3112:   Mat_SeqAIJ      *p  = (Mat_SeqAIJ *) P->data;
3113:   Mat_SeqAIJ      *c  = (Mat_SeqAIJ *) C->data;
3114:   const PetscInt  *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3115:   const PetscInt  *ci=c->i,*cj=c->j,*cjj;
3116:   const PetscInt  am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3117:   PetscInt        i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3118:   const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3119:   MatScalar       *ca=c->a,*caj,*apa;

3122:   /* Allocate temporary array for storage of one row of A*P */
3123:   PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3124:   PetscMemzero(apa,cn*sizeof(MatScalar));
3125:   PetscMemzero(apj,cn*sizeof(PetscInt));
3126:   PetscMemzero(apjdense,cn*sizeof(PetscInt));

3128:   /* Clear old values in C */
3129:   PetscMemzero(ca,ci[cm]*sizeof(MatScalar));

3131:   for (i=0;i<am;i++) {
3132:     /* Form sparse row of A*P */
3133:     anzi  = ai[i+1] - ai[i];
3134:     apnzj = 0;
3135:     for (j=0;j<anzi;j++) {
3136:       /* Get offset within block of P */
3137:       pshift = *aj%ppdof;
3138:       /* Get block row of P */
3139:       prow   = *aj++/ppdof; /* integer division */
3140:       pnzj = pi[prow+1] - pi[prow];
3141:       pjj  = pj + pi[prow];
3142:       paj  = pa + pi[prow];
3143:       for (k=0;k<pnzj;k++) {
3144:         poffset = pjj[k]*ppdof+pshift;
3145:         if (!apjdense[poffset]) {
3146:           apjdense[poffset] = -1;
3147:           apj[apnzj++]      = poffset;
3148:         }
3149:         apa[poffset] += (*aa)*paj[k];
3150:       }
3151:       PetscLogFlops(2.0*pnzj);
3152:       aa++;
3153:     }

3155:     /* Sort the j index array for quick sparse axpy. */
3156:     /* Note: a array does not need sorting as it is in dense storage locations. */
3157:     PetscSortInt(apnzj,apj);

3159:     /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3160:     prow    = i/ppdof; /* integer division */
3161:     pshift  = i%ppdof;
3162:     poffset = pi[prow];
3163:     pnzi = pi[prow+1] - poffset;
3164:     /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3165:     pJ   = pj+poffset;
3166:     pA   = pa+poffset;
3167:     for (j=0;j<pnzi;j++) {
3168:       crow   = (*pJ)*ppdof+pshift;
3169:       cjj    = cj + ci[crow];
3170:       caj    = ca + ci[crow];
3171:       pJ++;
3172:       /* Perform sparse axpy operation.  Note cjj includes apj. */
3173:       for (k=0,nextap=0;nextap<apnzj;k++) {
3174:         if (cjj[k]==apj[nextap]) {
3175:           caj[k] += (*pA)*apa[apj[nextap++]];
3176:         }
3177:       }
3178:       PetscLogFlops(2.0*apnzj);
3179:       pA++;
3180:     }

3182:     /* Zero the current row info for A*P */
3183:     for (j=0;j<apnzj;j++) {
3184:       apa[apj[j]]      = 0.;
3185:       apjdense[apj[j]] = 0;
3186:     }
3187:   }

3189:   /* Assemble the final matrix and clean up */
3190:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3191:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3192:   PetscFree3(apa,apj,apjdense);
3193:   return(0);
3194: }

3198: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3199: {
3200:   PetscErrorCode    ierr;

3203:   /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3204:   MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3205:   ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3206:   return(0);
3207: }

3211: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3212: {
3214:   SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3215:   return(0);
3216: }

3221: PetscErrorCode  MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3222: {
3223:   Mat_SeqMAIJ       *b = (Mat_SeqMAIJ*)A->data;
3224:   Mat               a = b->AIJ,B;
3225:   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)a->data;
3226:   PetscErrorCode    ierr;
3227:   PetscInt          m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3228:   PetscInt          *cols;
3229:   PetscScalar       *vals;

3232:   MatGetSize(a,&m,&n);
3233:   PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3234:   for (i=0; i<m; i++) {
3235:     nmax = PetscMax(nmax,aij->ilen[i]);
3236:     for (j=0; j<dof; j++) {
3237:       ilen[dof*i+j] = aij->ilen[i];
3238:     }
3239:   }
3240:   MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3241:   PetscFree(ilen);
3242:   PetscMalloc(nmax*sizeof(PetscInt),&icols);
3243:   ii   = 0;
3244:   for (i=0; i<m; i++) {
3245:     MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3246:     for (j=0; j<dof; j++) {
3247:       for (k=0; k<ncols; k++) {
3248:         icols[k] = dof*cols[k]+j;
3249:       }
3250:       MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3251:       ii++;
3252:     }
3253:     MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3254:   }
3255:   PetscFree(icols);
3256:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3257:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3259:   if (reuse == MAT_REUSE_MATRIX) {
3260:     MatHeaderReplace(A,B);
3261:   } else {
3262:     *newmat = B;
3263:   }
3264:   return(0);
3265: }

3268:  #include ../src/mat/impls/aij/mpi/mpiaij.h

3273: PetscErrorCode  MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3274: {
3275:   Mat_MPIMAIJ       *maij = (Mat_MPIMAIJ*)A->data;
3276:   Mat               MatAIJ  = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3277:   Mat               MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3278:   Mat_SeqAIJ        *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3279:   Mat_SeqAIJ        *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3280:   Mat_MPIAIJ        *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3281:   PetscInt          dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3282:   PetscInt          *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3283:   PetscInt          rstart,cstart,*garray,ii,k;
3284:   PetscErrorCode    ierr;
3285:   PetscScalar       *vals,*ovals;

3288:   PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3289:   for (i=0; i<A->rmap->n/dof; i++) {
3290:     nmax  = PetscMax(nmax,AIJ->ilen[i]);
3291:     onmax = PetscMax(onmax,OAIJ->ilen[i]);
3292:     for (j=0; j<dof; j++) {
3293:       dnz[dof*i+j] = AIJ->ilen[i];
3294:       onz[dof*i+j] = OAIJ->ilen[i];
3295:     }
3296:   }
3297:   MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3298:   PetscFree2(dnz,onz);

3300:   PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3301:   rstart = dof*maij->A->rmap->rstart;
3302:   cstart = dof*maij->A->cmap->rstart;
3303:   garray = mpiaij->garray;

3305:   ii = rstart;
3306:   for (i=0; i<A->rmap->n/dof; i++) {
3307:     MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3308:     MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3309:     for (j=0; j<dof; j++) {
3310:       for (k=0; k<ncols; k++) {
3311:         icols[k] = cstart + dof*cols[k]+j;
3312:       }
3313:       for (k=0; k<oncols; k++) {
3314:         oicols[k] = dof*garray[ocols[k]]+j;
3315:       }
3316:       MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3317:       MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3318:       ii++;
3319:     }
3320:     MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3321:     MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3322:   }
3323:   PetscFree2(icols,oicols);

3325:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3326:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

3328:   if (reuse == MAT_REUSE_MATRIX) {
3329:     PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3330:     ((PetscObject)A)->refct = 1;
3331:     MatHeaderReplace(A,B);
3332:     ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3333:   } else {
3334:     *newmat = B;
3335:   }
3336:   return(0);
3337: }


3341: /* ---------------------------------------------------------------------------------- */
3344: /*@C
3345:   MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3346:   operations for multicomponent problems.  It interpolates each component the same
3347:   way independently.  The matrix type is based on MATSEQAIJ for sequential matrices,
3348:   and MATMPIAIJ for distributed matrices.

3350:   Collective

3352:   Input Parameters:
3353: + A - the AIJ matrix describing the action on blocks
3354: - dof - the block size (number of components per node)

3356:   Output Parameter:
3357: . maij - the new MAIJ matrix

3359:   Operations provided:
3360: + MatMult
3361: . MatMultTranspose
3362: . MatMultAdd
3363: . MatMultTransposeAdd
3364: - MatView

3366:   Level: advanced

3368: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
3369: @*/
3370: PetscErrorCode  MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3371: {
3373:   PetscMPIInt    size;
3374:   PetscInt       n;
3375:   Mat            B;

3378:   PetscObjectReference((PetscObject)A);

3380:   if (dof == 1) {
3381:     *maij = A;
3382:   } else {
3383:     MatCreate(((PetscObject)A)->comm,&B);
3384:     MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3385:     PetscLayoutSetBlockSize(B->rmap,dof);
3386:     PetscLayoutSetBlockSize(B->cmap,dof);
3387:     PetscLayoutSetUp(B->rmap);
3388:     PetscLayoutSetUp(B->cmap);
3389:     B->assembled    = PETSC_TRUE;

3391:     MPI_Comm_size(((PetscObject)A)->comm,&size);
3392:     if (size == 1) {
3393:       Mat_SeqMAIJ    *b;

3395:       MatSetType(B,MATSEQMAIJ);
3396:       B->ops->destroy = MatDestroy_SeqMAIJ;
3397:       B->ops->view    = MatView_SeqMAIJ;
3398:       b      = (Mat_SeqMAIJ*)B->data;
3399:       b->dof = dof;
3400:       b->AIJ = A;
3401:       if (dof == 2) {
3402:         B->ops->mult             = MatMult_SeqMAIJ_2;
3403:         B->ops->multadd          = MatMultAdd_SeqMAIJ_2;
3404:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_2;
3405:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3406:       } else if (dof == 3) {
3407:         B->ops->mult             = MatMult_SeqMAIJ_3;
3408:         B->ops->multadd          = MatMultAdd_SeqMAIJ_3;
3409:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_3;
3410:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3411:       } else if (dof == 4) {
3412:         B->ops->mult             = MatMult_SeqMAIJ_4;
3413:         B->ops->multadd          = MatMultAdd_SeqMAIJ_4;
3414:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_4;
3415:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3416:       } else if (dof == 5) {
3417:         B->ops->mult             = MatMult_SeqMAIJ_5;
3418:         B->ops->multadd          = MatMultAdd_SeqMAIJ_5;
3419:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_5;
3420:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3421:       } else if (dof == 6) {
3422:         B->ops->mult             = MatMult_SeqMAIJ_6;
3423:         B->ops->multadd          = MatMultAdd_SeqMAIJ_6;
3424:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_6;
3425:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3426:       } else if (dof == 7) {
3427:         B->ops->mult             = MatMult_SeqMAIJ_7;
3428:         B->ops->multadd          = MatMultAdd_SeqMAIJ_7;
3429:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_7;
3430:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3431:       } else if (dof == 8) {
3432:         B->ops->mult             = MatMult_SeqMAIJ_8;
3433:         B->ops->multadd          = MatMultAdd_SeqMAIJ_8;
3434:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_8;
3435:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3436:       } else if (dof == 9) {
3437:         B->ops->mult             = MatMult_SeqMAIJ_9;
3438:         B->ops->multadd          = MatMultAdd_SeqMAIJ_9;
3439:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_9;
3440:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3441:       } else if (dof == 10) {
3442:         B->ops->mult             = MatMult_SeqMAIJ_10;
3443:         B->ops->multadd          = MatMultAdd_SeqMAIJ_10;
3444:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_10;
3445:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3446:       } else if (dof == 11) {
3447:         B->ops->mult             = MatMult_SeqMAIJ_11;
3448:         B->ops->multadd          = MatMultAdd_SeqMAIJ_11;
3449:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_11;
3450:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3451:       } else if (dof == 16) {
3452:         B->ops->mult             = MatMult_SeqMAIJ_16;
3453:         B->ops->multadd          = MatMultAdd_SeqMAIJ_16;
3454:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_16;
3455:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3456:       } else if (dof == 18) {
3457:         B->ops->mult             = MatMult_SeqMAIJ_18;
3458:         B->ops->multadd          = MatMultAdd_SeqMAIJ_18;
3459:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_18;
3460:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3461:       } else {
3462:         B->ops->mult             = MatMult_SeqMAIJ_N;
3463:         B->ops->multadd          = MatMultAdd_SeqMAIJ_N;
3464:         B->ops->multtranspose    = MatMultTranspose_SeqMAIJ_N;
3465:         B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3466:       }
3467:       B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3468:       B->ops->ptapnumeric_seqaij  = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3469:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3470:     } else {
3471:       Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
3472:       Mat_MPIMAIJ *b;
3473:       IS          from,to;
3474:       Vec         gvec;
3475:       PetscInt    *garray,i;

3477:       MatSetType(B,MATMPIMAIJ);
3478:       B->ops->destroy = MatDestroy_MPIMAIJ;
3479:       B->ops->view    = MatView_MPIMAIJ;
3480:       b      = (Mat_MPIMAIJ*)B->data;
3481:       b->dof = dof;
3482:       b->A   = A;
3483:       MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3484:       MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);

3486:       VecGetSize(mpiaij->lvec,&n);
3487:       VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3488:       VecSetBlockSize(b->w,dof);

3490:       /* create two temporary Index sets for build scatter gather */
3491:       PetscMalloc((n+1)*sizeof(PetscInt),&garray);
3492:       for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3493:       ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
3494:       PetscFree(garray);
3495:       ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);

3497:       /* create temporary global vector to generate scatter context */
3498:       VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3499:       VecSetBlockSize(gvec,dof);

3501:       /* generate the scatter context */
3502:       VecScatterCreate(gvec,from,b->w,to,&b->ctx);

3504:       ISDestroy(from);
3505:       ISDestroy(to);
3506:       VecDestroy(gvec);

3508:       B->ops->mult             = MatMult_MPIMAIJ_dof;
3509:       B->ops->multtranspose    = MatMultTranspose_MPIMAIJ_dof;
3510:       B->ops->multadd          = MatMultAdd_MPIMAIJ_dof;
3511:       B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3512:       B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3513:       B->ops->ptapnumeric_mpiaij  = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3514:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3515:     }
3516:     *maij = B;
3517:     MatView_Private(B);
3518:   }
3519:   return(0);
3520: }