Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include ../src/mat/impls/maij/maij.h
21: #include ../src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: /*@C
27: MatMAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the MAIJ matrix
29: Not Collective, but if the MAIJ matrix is parallel, the AIJ matrix is also parallel
31: Input Parameter:
32: . A - the MAIJ matrix
34: Output Parameter:
35: . B - the AIJ matrix
37: Level: advanced
39: Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
41: .seealso: MatCreateMAIJ()
42: @*/
43: PetscErrorCode MatMAIJGetAIJ(Mat A,Mat *B)
44: {
46: PetscTruth ismpimaij,isseqmaij;
49: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
50: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
51: if (ismpimaij) {
52: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
54: *B = b->A;
55: } else if (isseqmaij) {
56: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
58: *B = b->AIJ;
59: } else {
60: *B = A;
61: }
62: return(0);
63: }
67: /*@C
68: MatMAIJRedimension - Get an MAIJ matrix with the same action, but for a different block size
70: Collective
72: Input Parameter:
73: + A - the MAIJ matrix
74: - dof - the block size for the new matrix
76: Output Parameter:
77: . B - the new MAIJ matrix
79: Level: advanced
81: .seealso: MatCreateMAIJ()
82: @*/
83: PetscErrorCode MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
84: {
86: Mat Aij = PETSC_NULL;
89: MatMAIJGetAIJ(A,&Aij);
90: MatCreateMAIJ(Aij,dof,B);
91: return(0);
92: }
96: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
97: {
99: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
102: if (b->AIJ) {
103: MatDestroy(b->AIJ);
104: }
105: PetscFree(b);
106: return(0);
107: }
111: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
112: {
114: Mat B;
117: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
118: MatView(B,viewer);
119: MatDestroy(B);
120: return(0);
121: }
125: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
126: {
128: Mat B;
131: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
132: MatView(B,viewer);
133: MatDestroy(B);
134: return(0);
135: }
139: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
140: {
142: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
145: if (b->AIJ) {
146: MatDestroy(b->AIJ);
147: }
148: if (b->OAIJ) {
149: MatDestroy(b->OAIJ);
150: }
151: if (b->A) {
152: MatDestroy(b->A);
153: }
154: if (b->ctx) {
155: VecScatterDestroy(b->ctx);
156: }
157: if (b->w) {
158: VecDestroy(b->w);
159: }
160: PetscFree(b);
161: PetscObjectChangeTypeName((PetscObject)A,0);
162: return(0);
163: }
165: /*MC
166: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
167: multicomponent problems, interpolating or restricting each component the same way independently.
168: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
170: Operations provided:
171: . MatMult
172: . MatMultTranspose
173: . MatMultAdd
174: . MatMultTransposeAdd
176: Level: advanced
178: .seealso: MatCreateSeqDense
179: M*/
184: PetscErrorCode MatCreate_MAIJ(Mat A)
185: {
187: Mat_MPIMAIJ *b;
188: PetscMPIInt size;
191: PetscNewLog(A,Mat_MPIMAIJ,&b);
192: A->data = (void*)b;
193: PetscMemzero(A->ops,sizeof(struct _MatOps));
194: A->mapping = 0;
196: b->AIJ = 0;
197: b->dof = 0;
198: b->OAIJ = 0;
199: b->ctx = 0;
200: b->w = 0;
201: MPI_Comm_size(((PetscObject)A)->comm,&size);
202: if (size == 1){
203: PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);
204: } else {
205: PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);
206: }
207: return(0);
208: }
211: /* --------------------------------------------------------------------------------------*/
214: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
215: {
216: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
217: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
218: const PetscScalar *x,*v;
219: PetscScalar *y, sum1, sum2;
220: PetscErrorCode ierr;
221: PetscInt nonzerorow=0,n,i,jrow,j;
222: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
225: VecGetArray(xx,(PetscScalar**)&x);
226: VecGetArray(yy,&y);
227: idx = a->j;
228: v = a->a;
229: ii = a->i;
231: for (i=0; i<m; i++) {
232: jrow = ii[i];
233: n = ii[i+1] - jrow;
234: sum1 = 0.0;
235: sum2 = 0.0;
236: nonzerorow += (n>0);
237: for (j=0; j<n; j++) {
238: sum1 += v[jrow]*x[2*idx[jrow]];
239: sum2 += v[jrow]*x[2*idx[jrow]+1];
240: jrow++;
241: }
242: y[2*i] = sum1;
243: y[2*i+1] = sum2;
244: }
246: PetscLogFlops(4.0*a->nz - 2.0*nonzerorow);
247: VecRestoreArray(xx,(PetscScalar**)&x);
248: VecRestoreArray(yy,&y);
249: return(0);
250: }
254: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
255: {
256: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
257: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
258: const PetscScalar *x,*v;
259: PetscScalar *y,alpha1,alpha2;
260: PetscErrorCode ierr;
261: PetscInt n,i;
262: const PetscInt m = b->AIJ->rmap->n,*idx;
265: VecSet(yy,0.0);
266: VecGetArray(xx,(PetscScalar**)&x);
267: VecGetArray(yy,&y);
268:
269: for (i=0; i<m; i++) {
270: idx = a->j + a->i[i] ;
271: v = a->a + a->i[i] ;
272: n = a->i[i+1] - a->i[i];
273: alpha1 = x[2*i];
274: alpha2 = x[2*i+1];
275: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
276: }
277: PetscLogFlops(4.0*a->nz);
278: VecRestoreArray(xx,(PetscScalar**)&x);
279: VecRestoreArray(yy,&y);
280: return(0);
281: }
285: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
286: {
287: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
288: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
289: const PetscScalar *x,*v;
290: PetscScalar *y,sum1, sum2;
291: PetscErrorCode ierr;
292: PetscInt n,i,jrow,j;
293: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
296: if (yy != zz) {VecCopy(yy,zz);}
297: VecGetArray(xx,(PetscScalar**)&x);
298: VecGetArray(zz,&y);
299: idx = a->j;
300: v = a->a;
301: ii = a->i;
303: for (i=0; i<m; i++) {
304: jrow = ii[i];
305: n = ii[i+1] - jrow;
306: sum1 = 0.0;
307: sum2 = 0.0;
308: for (j=0; j<n; j++) {
309: sum1 += v[jrow]*x[2*idx[jrow]];
310: sum2 += v[jrow]*x[2*idx[jrow]+1];
311: jrow++;
312: }
313: y[2*i] += sum1;
314: y[2*i+1] += sum2;
315: }
317: PetscLogFlops(4.0*a->nz);
318: VecRestoreArray(xx,(PetscScalar**)&x);
319: VecRestoreArray(zz,&y);
320: return(0);
321: }
324: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
325: {
326: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
327: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
328: const PetscScalar *x,*v;
329: PetscScalar *y,alpha1,alpha2;
330: PetscErrorCode ierr;
331: PetscInt n,i;
332: const PetscInt m = b->AIJ->rmap->n,*idx;
335: if (yy != zz) {VecCopy(yy,zz);}
336: VecGetArray(xx,(PetscScalar**)&x);
337: VecGetArray(zz,&y);
338:
339: for (i=0; i<m; i++) {
340: idx = a->j + a->i[i] ;
341: v = a->a + a->i[i] ;
342: n = a->i[i+1] - a->i[i];
343: alpha1 = x[2*i];
344: alpha2 = x[2*i+1];
345: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
346: }
347: PetscLogFlops(4.0*a->nz);
348: VecRestoreArray(xx,(PetscScalar**)&x);
349: VecRestoreArray(zz,&y);
350: return(0);
351: }
352: /* --------------------------------------------------------------------------------------*/
355: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
356: {
357: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
358: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
359: const PetscScalar *x,*v;
360: PetscScalar *y,sum1, sum2, sum3;
361: PetscErrorCode ierr;
362: PetscInt nonzerorow=0,n,i,jrow,j;
363: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
366: VecGetArray(xx,(PetscScalar**)&x);
367: VecGetArray(yy,&y);
368: idx = a->j;
369: v = a->a;
370: ii = a->i;
372: for (i=0; i<m; i++) {
373: jrow = ii[i];
374: n = ii[i+1] - jrow;
375: sum1 = 0.0;
376: sum2 = 0.0;
377: sum3 = 0.0;
378: nonzerorow += (n>0);
379: for (j=0; j<n; j++) {
380: sum1 += v[jrow]*x[3*idx[jrow]];
381: sum2 += v[jrow]*x[3*idx[jrow]+1];
382: sum3 += v[jrow]*x[3*idx[jrow]+2];
383: jrow++;
384: }
385: y[3*i] = sum1;
386: y[3*i+1] = sum2;
387: y[3*i+2] = sum3;
388: }
390: PetscLogFlops(6.0*a->nz - 3.0*nonzerorow);
391: VecRestoreArray(xx,(PetscScalar**)&x);
392: VecRestoreArray(yy,&y);
393: return(0);
394: }
398: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
399: {
400: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
401: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
402: const PetscScalar *x,*v;
403: PetscScalar *y,alpha1,alpha2,alpha3;
404: PetscErrorCode ierr;
405: PetscInt n,i;
406: const PetscInt m = b->AIJ->rmap->n,*idx;
409: VecSet(yy,0.0);
410: VecGetArray(xx,(PetscScalar**)&x);
411: VecGetArray(yy,&y);
412:
413: for (i=0; i<m; i++) {
414: idx = a->j + a->i[i];
415: v = a->a + a->i[i];
416: n = a->i[i+1] - a->i[i];
417: alpha1 = x[3*i];
418: alpha2 = x[3*i+1];
419: alpha3 = x[3*i+2];
420: while (n-->0) {
421: y[3*(*idx)] += alpha1*(*v);
422: y[3*(*idx)+1] += alpha2*(*v);
423: y[3*(*idx)+2] += alpha3*(*v);
424: idx++; v++;
425: }
426: }
427: PetscLogFlops(6.0*a->nz);
428: VecRestoreArray(xx,(PetscScalar**)&x);
429: VecRestoreArray(yy,&y);
430: return(0);
431: }
435: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
436: {
437: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
438: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
439: const PetscScalar *x,*v;
440: PetscScalar *y,sum1, sum2, sum3;
441: PetscErrorCode ierr;
442: PetscInt n,i,jrow,j;
443: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
446: if (yy != zz) {VecCopy(yy,zz);}
447: VecGetArray(xx,(PetscScalar**)&x);
448: VecGetArray(zz,&y);
449: idx = a->j;
450: v = a->a;
451: ii = a->i;
453: for (i=0; i<m; i++) {
454: jrow = ii[i];
455: n = ii[i+1] - jrow;
456: sum1 = 0.0;
457: sum2 = 0.0;
458: sum3 = 0.0;
459: for (j=0; j<n; j++) {
460: sum1 += v[jrow]*x[3*idx[jrow]];
461: sum2 += v[jrow]*x[3*idx[jrow]+1];
462: sum3 += v[jrow]*x[3*idx[jrow]+2];
463: jrow++;
464: }
465: y[3*i] += sum1;
466: y[3*i+1] += sum2;
467: y[3*i+2] += sum3;
468: }
470: PetscLogFlops(6.0*a->nz);
471: VecRestoreArray(xx,(PetscScalar**)&x);
472: VecRestoreArray(zz,&y);
473: return(0);
474: }
477: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
478: {
479: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
480: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
481: const PetscScalar *x,*v;
482: PetscScalar *y,alpha1,alpha2,alpha3;
483: PetscErrorCode ierr;
484: PetscInt n,i;
485: const PetscInt m = b->AIJ->rmap->n,*idx;
488: if (yy != zz) {VecCopy(yy,zz);}
489: VecGetArray(xx,(PetscScalar**)&x);
490: VecGetArray(zz,&y);
491: for (i=0; i<m; i++) {
492: idx = a->j + a->i[i] ;
493: v = a->a + a->i[i] ;
494: n = a->i[i+1] - a->i[i];
495: alpha1 = x[3*i];
496: alpha2 = x[3*i+1];
497: alpha3 = x[3*i+2];
498: while (n-->0) {
499: y[3*(*idx)] += alpha1*(*v);
500: y[3*(*idx)+1] += alpha2*(*v);
501: y[3*(*idx)+2] += alpha3*(*v);
502: idx++; v++;
503: }
504: }
505: PetscLogFlops(6.0*a->nz);
506: VecRestoreArray(xx,(PetscScalar**)&x);
507: VecRestoreArray(zz,&y);
508: return(0);
509: }
511: /* ------------------------------------------------------------------------------*/
514: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
515: {
516: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
517: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
518: const PetscScalar *x,*v;
519: PetscScalar *y,sum1, sum2, sum3, sum4;
520: PetscErrorCode ierr;
521: PetscInt nonzerorow=0,n,i,jrow,j;
522: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
525: VecGetArray(xx,(PetscScalar**)&x);
526: VecGetArray(yy,&y);
527: idx = a->j;
528: v = a->a;
529: ii = a->i;
531: for (i=0; i<m; i++) {
532: jrow = ii[i];
533: n = ii[i+1] - jrow;
534: sum1 = 0.0;
535: sum2 = 0.0;
536: sum3 = 0.0;
537: sum4 = 0.0;
538: nonzerorow += (n>0);
539: for (j=0; j<n; j++) {
540: sum1 += v[jrow]*x[4*idx[jrow]];
541: sum2 += v[jrow]*x[4*idx[jrow]+1];
542: sum3 += v[jrow]*x[4*idx[jrow]+2];
543: sum4 += v[jrow]*x[4*idx[jrow]+3];
544: jrow++;
545: }
546: y[4*i] = sum1;
547: y[4*i+1] = sum2;
548: y[4*i+2] = sum3;
549: y[4*i+3] = sum4;
550: }
552: PetscLogFlops(8.0*a->nz - 4.0*nonzerorow);
553: VecRestoreArray(xx,(PetscScalar**)&x);
554: VecRestoreArray(yy,&y);
555: return(0);
556: }
560: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
561: {
562: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
563: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
564: const PetscScalar *x,*v;
565: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
566: PetscErrorCode ierr;
567: PetscInt n,i;
568: const PetscInt m = b->AIJ->rmap->n,*idx;
571: VecSet(yy,0.0);
572: VecGetArray(xx,(PetscScalar**)&x);
573: VecGetArray(yy,&y);
574: for (i=0; i<m; i++) {
575: idx = a->j + a->i[i] ;
576: v = a->a + a->i[i] ;
577: n = a->i[i+1] - a->i[i];
578: alpha1 = x[4*i];
579: alpha2 = x[4*i+1];
580: alpha3 = x[4*i+2];
581: alpha4 = x[4*i+3];
582: while (n-->0) {
583: y[4*(*idx)] += alpha1*(*v);
584: y[4*(*idx)+1] += alpha2*(*v);
585: y[4*(*idx)+2] += alpha3*(*v);
586: y[4*(*idx)+3] += alpha4*(*v);
587: idx++; v++;
588: }
589: }
590: PetscLogFlops(8.0*a->nz);
591: VecRestoreArray(xx,(PetscScalar**)&x);
592: VecRestoreArray(yy,&y);
593: return(0);
594: }
598: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
599: {
600: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
601: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
602: const PetscScalar *x,*v;
603: PetscScalar *y,sum1, sum2, sum3, sum4;
604: PetscErrorCode ierr;
605: PetscInt n,i,jrow,j;
606: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
609: if (yy != zz) {VecCopy(yy,zz);}
610: VecGetArray(xx,(PetscScalar**)&x);
611: VecGetArray(zz,&y);
612: idx = a->j;
613: v = a->a;
614: ii = a->i;
616: for (i=0; i<m; i++) {
617: jrow = ii[i];
618: n = ii[i+1] - jrow;
619: sum1 = 0.0;
620: sum2 = 0.0;
621: sum3 = 0.0;
622: sum4 = 0.0;
623: for (j=0; j<n; j++) {
624: sum1 += v[jrow]*x[4*idx[jrow]];
625: sum2 += v[jrow]*x[4*idx[jrow]+1];
626: sum3 += v[jrow]*x[4*idx[jrow]+2];
627: sum4 += v[jrow]*x[4*idx[jrow]+3];
628: jrow++;
629: }
630: y[4*i] += sum1;
631: y[4*i+1] += sum2;
632: y[4*i+2] += sum3;
633: y[4*i+3] += sum4;
634: }
636: PetscLogFlops(8.0*a->nz);
637: VecRestoreArray(xx,(PetscScalar**)&x);
638: VecRestoreArray(zz,&y);
639: return(0);
640: }
643: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
644: {
645: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
646: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
647: const PetscScalar *x,*v;
648: PetscScalar *y,alpha1,alpha2,alpha3,alpha4;
649: PetscErrorCode ierr;
650: PetscInt n,i;
651: const PetscInt m = b->AIJ->rmap->n,*idx;
654: if (yy != zz) {VecCopy(yy,zz);}
655: VecGetArray(xx,(PetscScalar**)&x);
656: VecGetArray(zz,&y);
657:
658: for (i=0; i<m; i++) {
659: idx = a->j + a->i[i] ;
660: v = a->a + a->i[i] ;
661: n = a->i[i+1] - a->i[i];
662: alpha1 = x[4*i];
663: alpha2 = x[4*i+1];
664: alpha3 = x[4*i+2];
665: alpha4 = x[4*i+3];
666: while (n-->0) {
667: y[4*(*idx)] += alpha1*(*v);
668: y[4*(*idx)+1] += alpha2*(*v);
669: y[4*(*idx)+2] += alpha3*(*v);
670: y[4*(*idx)+3] += alpha4*(*v);
671: idx++; v++;
672: }
673: }
674: PetscLogFlops(8.0*a->nz);
675: VecRestoreArray(xx,(PetscScalar**)&x);
676: VecRestoreArray(zz,&y);
677: return(0);
678: }
679: /* ------------------------------------------------------------------------------*/
683: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
684: {
685: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
686: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
687: const PetscScalar *x,*v;
688: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
689: PetscErrorCode ierr;
690: PetscInt nonzerorow=0,n,i,jrow,j;
691: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
694: VecGetArray(xx,(PetscScalar**)&x);
695: VecGetArray(yy,&y);
696: idx = a->j;
697: v = a->a;
698: ii = a->i;
700: for (i=0; i<m; i++) {
701: jrow = ii[i];
702: n = ii[i+1] - jrow;
703: sum1 = 0.0;
704: sum2 = 0.0;
705: sum3 = 0.0;
706: sum4 = 0.0;
707: sum5 = 0.0;
708: nonzerorow += (n>0);
709: for (j=0; j<n; j++) {
710: sum1 += v[jrow]*x[5*idx[jrow]];
711: sum2 += v[jrow]*x[5*idx[jrow]+1];
712: sum3 += v[jrow]*x[5*idx[jrow]+2];
713: sum4 += v[jrow]*x[5*idx[jrow]+3];
714: sum5 += v[jrow]*x[5*idx[jrow]+4];
715: jrow++;
716: }
717: y[5*i] = sum1;
718: y[5*i+1] = sum2;
719: y[5*i+2] = sum3;
720: y[5*i+3] = sum4;
721: y[5*i+4] = sum5;
722: }
724: PetscLogFlops(10.0*a->nz - 5.0*nonzerorow);
725: VecRestoreArray(xx,(PetscScalar**)&x);
726: VecRestoreArray(yy,&y);
727: return(0);
728: }
732: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
733: {
734: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
735: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
736: const PetscScalar *x,*v;
737: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
738: PetscErrorCode ierr;
739: PetscInt n,i;
740: const PetscInt m = b->AIJ->rmap->n,*idx;
743: VecSet(yy,0.0);
744: VecGetArray(xx,(PetscScalar**)&x);
745: VecGetArray(yy,&y);
746:
747: for (i=0; i<m; i++) {
748: idx = a->j + a->i[i] ;
749: v = a->a + a->i[i] ;
750: n = a->i[i+1] - a->i[i];
751: alpha1 = x[5*i];
752: alpha2 = x[5*i+1];
753: alpha3 = x[5*i+2];
754: alpha4 = x[5*i+3];
755: alpha5 = x[5*i+4];
756: while (n-->0) {
757: y[5*(*idx)] += alpha1*(*v);
758: y[5*(*idx)+1] += alpha2*(*v);
759: y[5*(*idx)+2] += alpha3*(*v);
760: y[5*(*idx)+3] += alpha4*(*v);
761: y[5*(*idx)+4] += alpha5*(*v);
762: idx++; v++;
763: }
764: }
765: PetscLogFlops(10.0*a->nz);
766: VecRestoreArray(xx,(PetscScalar**)&x);
767: VecRestoreArray(yy,&y);
768: return(0);
769: }
773: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
774: {
775: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
776: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
777: const PetscScalar *x,*v;
778: PetscScalar *y,sum1, sum2, sum3, sum4, sum5;
779: PetscErrorCode ierr;
780: PetscInt n,i,jrow,j;
781: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
784: if (yy != zz) {VecCopy(yy,zz);}
785: VecGetArray(xx,(PetscScalar**)&x);
786: VecGetArray(zz,&y);
787: idx = a->j;
788: v = a->a;
789: ii = a->i;
791: for (i=0; i<m; i++) {
792: jrow = ii[i];
793: n = ii[i+1] - jrow;
794: sum1 = 0.0;
795: sum2 = 0.0;
796: sum3 = 0.0;
797: sum4 = 0.0;
798: sum5 = 0.0;
799: for (j=0; j<n; j++) {
800: sum1 += v[jrow]*x[5*idx[jrow]];
801: sum2 += v[jrow]*x[5*idx[jrow]+1];
802: sum3 += v[jrow]*x[5*idx[jrow]+2];
803: sum4 += v[jrow]*x[5*idx[jrow]+3];
804: sum5 += v[jrow]*x[5*idx[jrow]+4];
805: jrow++;
806: }
807: y[5*i] += sum1;
808: y[5*i+1] += sum2;
809: y[5*i+2] += sum3;
810: y[5*i+3] += sum4;
811: y[5*i+4] += sum5;
812: }
814: PetscLogFlops(10.0*a->nz);
815: VecRestoreArray(xx,(PetscScalar**)&x);
816: VecRestoreArray(zz,&y);
817: return(0);
818: }
822: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
823: {
824: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
825: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
826: const PetscScalar *x,*v;
827: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5;
828: PetscErrorCode ierr;
829: PetscInt n,i;
830: const PetscInt m = b->AIJ->rmap->n,*idx;
833: if (yy != zz) {VecCopy(yy,zz);}
834: VecGetArray(xx,(PetscScalar**)&x);
835: VecGetArray(zz,&y);
836:
837: for (i=0; i<m; i++) {
838: idx = a->j + a->i[i] ;
839: v = a->a + a->i[i] ;
840: n = a->i[i+1] - a->i[i];
841: alpha1 = x[5*i];
842: alpha2 = x[5*i+1];
843: alpha3 = x[5*i+2];
844: alpha4 = x[5*i+3];
845: alpha5 = x[5*i+4];
846: while (n-->0) {
847: y[5*(*idx)] += alpha1*(*v);
848: y[5*(*idx)+1] += alpha2*(*v);
849: y[5*(*idx)+2] += alpha3*(*v);
850: y[5*(*idx)+3] += alpha4*(*v);
851: y[5*(*idx)+4] += alpha5*(*v);
852: idx++; v++;
853: }
854: }
855: PetscLogFlops(10.0*a->nz);
856: VecRestoreArray(xx,(PetscScalar**)&x);
857: VecRestoreArray(zz,&y);
858: return(0);
859: }
861: /* ------------------------------------------------------------------------------*/
864: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
865: {
866: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
867: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
868: const PetscScalar *x,*v;
869: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
870: PetscErrorCode ierr;
871: PetscInt nonzerorow=0,n,i,jrow,j;
872: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
875: VecGetArray(xx,(PetscScalar**)&x);
876: VecGetArray(yy,&y);
877: idx = a->j;
878: v = a->a;
879: ii = a->i;
881: for (i=0; i<m; i++) {
882: jrow = ii[i];
883: n = ii[i+1] - jrow;
884: sum1 = 0.0;
885: sum2 = 0.0;
886: sum3 = 0.0;
887: sum4 = 0.0;
888: sum5 = 0.0;
889: sum6 = 0.0;
890: nonzerorow += (n>0);
891: for (j=0; j<n; j++) {
892: sum1 += v[jrow]*x[6*idx[jrow]];
893: sum2 += v[jrow]*x[6*idx[jrow]+1];
894: sum3 += v[jrow]*x[6*idx[jrow]+2];
895: sum4 += v[jrow]*x[6*idx[jrow]+3];
896: sum5 += v[jrow]*x[6*idx[jrow]+4];
897: sum6 += v[jrow]*x[6*idx[jrow]+5];
898: jrow++;
899: }
900: y[6*i] = sum1;
901: y[6*i+1] = sum2;
902: y[6*i+2] = sum3;
903: y[6*i+3] = sum4;
904: y[6*i+4] = sum5;
905: y[6*i+5] = sum6;
906: }
908: PetscLogFlops(12.0*a->nz - 6.0*nonzerorow);
909: VecRestoreArray(xx,(PetscScalar**)&x);
910: VecRestoreArray(yy,&y);
911: return(0);
912: }
916: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
917: {
918: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
919: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
920: const PetscScalar *x,*v;
921: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
922: PetscErrorCode ierr;
923: PetscInt n,i;
924: const PetscInt m = b->AIJ->rmap->n,*idx;
927: VecSet(yy,0.0);
928: VecGetArray(xx,(PetscScalar**)&x);
929: VecGetArray(yy,&y);
931: for (i=0; i<m; i++) {
932: idx = a->j + a->i[i] ;
933: v = a->a + a->i[i] ;
934: n = a->i[i+1] - a->i[i];
935: alpha1 = x[6*i];
936: alpha2 = x[6*i+1];
937: alpha3 = x[6*i+2];
938: alpha4 = x[6*i+3];
939: alpha5 = x[6*i+4];
940: alpha6 = x[6*i+5];
941: while (n-->0) {
942: y[6*(*idx)] += alpha1*(*v);
943: y[6*(*idx)+1] += alpha2*(*v);
944: y[6*(*idx)+2] += alpha3*(*v);
945: y[6*(*idx)+3] += alpha4*(*v);
946: y[6*(*idx)+4] += alpha5*(*v);
947: y[6*(*idx)+5] += alpha6*(*v);
948: idx++; v++;
949: }
950: }
951: PetscLogFlops(12.0*a->nz);
952: VecRestoreArray(xx,(PetscScalar**)&x);
953: VecRestoreArray(yy,&y);
954: return(0);
955: }
959: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
960: {
961: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
962: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
963: const PetscScalar *x,*v;
964: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6;
965: PetscErrorCode ierr;
966: PetscInt n,i,jrow,j;
967: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
970: if (yy != zz) {VecCopy(yy,zz);}
971: VecGetArray(xx,(PetscScalar**)&x);
972: VecGetArray(zz,&y);
973: idx = a->j;
974: v = a->a;
975: ii = a->i;
977: for (i=0; i<m; i++) {
978: jrow = ii[i];
979: n = ii[i+1] - jrow;
980: sum1 = 0.0;
981: sum2 = 0.0;
982: sum3 = 0.0;
983: sum4 = 0.0;
984: sum5 = 0.0;
985: sum6 = 0.0;
986: for (j=0; j<n; j++) {
987: sum1 += v[jrow]*x[6*idx[jrow]];
988: sum2 += v[jrow]*x[6*idx[jrow]+1];
989: sum3 += v[jrow]*x[6*idx[jrow]+2];
990: sum4 += v[jrow]*x[6*idx[jrow]+3];
991: sum5 += v[jrow]*x[6*idx[jrow]+4];
992: sum6 += v[jrow]*x[6*idx[jrow]+5];
993: jrow++;
994: }
995: y[6*i] += sum1;
996: y[6*i+1] += sum2;
997: y[6*i+2] += sum3;
998: y[6*i+3] += sum4;
999: y[6*i+4] += sum5;
1000: y[6*i+5] += sum6;
1001: }
1003: PetscLogFlops(12.0*a->nz);
1004: VecRestoreArray(xx,(PetscScalar**)&x);
1005: VecRestoreArray(zz,&y);
1006: return(0);
1007: }
1011: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
1012: {
1013: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1015: const PetscScalar *x,*v;
1016: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
1017: PetscErrorCode ierr;
1018: PetscInt n,i;
1019: const PetscInt m = b->AIJ->rmap->n,*idx;
1022: if (yy != zz) {VecCopy(yy,zz);}
1023: VecGetArray(xx,(PetscScalar**)&x);
1024: VecGetArray(zz,&y);
1025:
1026: for (i=0; i<m; i++) {
1027: idx = a->j + a->i[i] ;
1028: v = a->a + a->i[i] ;
1029: n = a->i[i+1] - a->i[i];
1030: alpha1 = x[6*i];
1031: alpha2 = x[6*i+1];
1032: alpha3 = x[6*i+2];
1033: alpha4 = x[6*i+3];
1034: alpha5 = x[6*i+4];
1035: alpha6 = x[6*i+5];
1036: while (n-->0) {
1037: y[6*(*idx)] += alpha1*(*v);
1038: y[6*(*idx)+1] += alpha2*(*v);
1039: y[6*(*idx)+2] += alpha3*(*v);
1040: y[6*(*idx)+3] += alpha4*(*v);
1041: y[6*(*idx)+4] += alpha5*(*v);
1042: y[6*(*idx)+5] += alpha6*(*v);
1043: idx++; v++;
1044: }
1045: }
1046: PetscLogFlops(12.0*a->nz);
1047: VecRestoreArray(xx,(PetscScalar**)&x);
1048: VecRestoreArray(zz,&y);
1049: return(0);
1050: }
1052: /* ------------------------------------------------------------------------------*/
1055: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1056: {
1057: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1058: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1059: const PetscScalar *x,*v;
1060: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1061: PetscErrorCode ierr;
1062: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1063: PetscInt nonzerorow=0,n,i,jrow,j;
1066: VecGetArray(xx,(PetscScalar**)&x);
1067: VecGetArray(yy,&y);
1068: idx = a->j;
1069: v = a->a;
1070: ii = a->i;
1072: for (i=0; i<m; i++) {
1073: jrow = ii[i];
1074: n = ii[i+1] - jrow;
1075: sum1 = 0.0;
1076: sum2 = 0.0;
1077: sum3 = 0.0;
1078: sum4 = 0.0;
1079: sum5 = 0.0;
1080: sum6 = 0.0;
1081: sum7 = 0.0;
1082: nonzerorow += (n>0);
1083: for (j=0; j<n; j++) {
1084: sum1 += v[jrow]*x[7*idx[jrow]];
1085: sum2 += v[jrow]*x[7*idx[jrow]+1];
1086: sum3 += v[jrow]*x[7*idx[jrow]+2];
1087: sum4 += v[jrow]*x[7*idx[jrow]+3];
1088: sum5 += v[jrow]*x[7*idx[jrow]+4];
1089: sum6 += v[jrow]*x[7*idx[jrow]+5];
1090: sum7 += v[jrow]*x[7*idx[jrow]+6];
1091: jrow++;
1092: }
1093: y[7*i] = sum1;
1094: y[7*i+1] = sum2;
1095: y[7*i+2] = sum3;
1096: y[7*i+3] = sum4;
1097: y[7*i+4] = sum5;
1098: y[7*i+5] = sum6;
1099: y[7*i+6] = sum7;
1100: }
1102: PetscLogFlops(14.0*a->nz - 7.0*nonzerorow);
1103: VecRestoreArray(xx,(PetscScalar**)&x);
1104: VecRestoreArray(yy,&y);
1105: return(0);
1106: }
1110: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1111: {
1112: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1113: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1114: const PetscScalar *x,*v;
1115: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1116: PetscErrorCode ierr;
1117: const PetscInt m = b->AIJ->rmap->n,*idx;
1118: PetscInt n,i;
1121: VecSet(yy,0.0);
1122: VecGetArray(xx,(PetscScalar**)&x);
1123: VecGetArray(yy,&y);
1125: for (i=0; i<m; i++) {
1126: idx = a->j + a->i[i] ;
1127: v = a->a + a->i[i] ;
1128: n = a->i[i+1] - a->i[i];
1129: alpha1 = x[7*i];
1130: alpha2 = x[7*i+1];
1131: alpha3 = x[7*i+2];
1132: alpha4 = x[7*i+3];
1133: alpha5 = x[7*i+4];
1134: alpha6 = x[7*i+5];
1135: alpha7 = x[7*i+6];
1136: while (n-->0) {
1137: y[7*(*idx)] += alpha1*(*v);
1138: y[7*(*idx)+1] += alpha2*(*v);
1139: y[7*(*idx)+2] += alpha3*(*v);
1140: y[7*(*idx)+3] += alpha4*(*v);
1141: y[7*(*idx)+4] += alpha5*(*v);
1142: y[7*(*idx)+5] += alpha6*(*v);
1143: y[7*(*idx)+6] += alpha7*(*v);
1144: idx++; v++;
1145: }
1146: }
1147: PetscLogFlops(14.0*a->nz);
1148: VecRestoreArray(xx,(PetscScalar**)&x);
1149: VecRestoreArray(yy,&y);
1150: return(0);
1151: }
1155: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1156: {
1157: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1158: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1159: const PetscScalar *x,*v;
1160: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1161: PetscErrorCode ierr;
1162: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1163: PetscInt n,i,jrow,j;
1166: if (yy != zz) {VecCopy(yy,zz);}
1167: VecGetArray(xx,(PetscScalar**)&x);
1168: VecGetArray(zz,&y);
1169: idx = a->j;
1170: v = a->a;
1171: ii = a->i;
1173: for (i=0; i<m; i++) {
1174: jrow = ii[i];
1175: n = ii[i+1] - jrow;
1176: sum1 = 0.0;
1177: sum2 = 0.0;
1178: sum3 = 0.0;
1179: sum4 = 0.0;
1180: sum5 = 0.0;
1181: sum6 = 0.0;
1182: sum7 = 0.0;
1183: for (j=0; j<n; j++) {
1184: sum1 += v[jrow]*x[7*idx[jrow]];
1185: sum2 += v[jrow]*x[7*idx[jrow]+1];
1186: sum3 += v[jrow]*x[7*idx[jrow]+2];
1187: sum4 += v[jrow]*x[7*idx[jrow]+3];
1188: sum5 += v[jrow]*x[7*idx[jrow]+4];
1189: sum6 += v[jrow]*x[7*idx[jrow]+5];
1190: sum7 += v[jrow]*x[7*idx[jrow]+6];
1191: jrow++;
1192: }
1193: y[7*i] += sum1;
1194: y[7*i+1] += sum2;
1195: y[7*i+2] += sum3;
1196: y[7*i+3] += sum4;
1197: y[7*i+4] += sum5;
1198: y[7*i+5] += sum6;
1199: y[7*i+6] += sum7;
1200: }
1202: PetscLogFlops(14.0*a->nz);
1203: VecRestoreArray(xx,(PetscScalar**)&x);
1204: VecRestoreArray(zz,&y);
1205: return(0);
1206: }
1210: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1211: {
1212: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1213: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1214: const PetscScalar *x,*v;
1215: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1216: PetscErrorCode ierr;
1217: const PetscInt m = b->AIJ->rmap->n,*idx;
1218: PetscInt n,i;
1221: if (yy != zz) {VecCopy(yy,zz);}
1222: VecGetArray(xx,(PetscScalar**)&x);
1223: VecGetArray(zz,&y);
1224: for (i=0; i<m; i++) {
1225: idx = a->j + a->i[i] ;
1226: v = a->a + a->i[i] ;
1227: n = a->i[i+1] - a->i[i];
1228: alpha1 = x[7*i];
1229: alpha2 = x[7*i+1];
1230: alpha3 = x[7*i+2];
1231: alpha4 = x[7*i+3];
1232: alpha5 = x[7*i+4];
1233: alpha6 = x[7*i+5];
1234: alpha7 = x[7*i+6];
1235: while (n-->0) {
1236: y[7*(*idx)] += alpha1*(*v);
1237: y[7*(*idx)+1] += alpha2*(*v);
1238: y[7*(*idx)+2] += alpha3*(*v);
1239: y[7*(*idx)+3] += alpha4*(*v);
1240: y[7*(*idx)+4] += alpha5*(*v);
1241: y[7*(*idx)+5] += alpha6*(*v);
1242: y[7*(*idx)+6] += alpha7*(*v);
1243: idx++; v++;
1244: }
1245: }
1246: PetscLogFlops(14.0*a->nz);
1247: VecRestoreArray(xx,(PetscScalar**)&x);
1248: VecRestoreArray(zz,&y);
1249: return(0);
1250: }
1254: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1255: {
1256: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1257: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1258: const PetscScalar *x,*v;
1259: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1260: PetscErrorCode ierr;
1261: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1262: PetscInt nonzerorow=0,n,i,jrow,j;
1265: VecGetArray(xx,(PetscScalar**)&x);
1266: VecGetArray(yy,&y);
1267: idx = a->j;
1268: v = a->a;
1269: ii = a->i;
1271: for (i=0; i<m; i++) {
1272: jrow = ii[i];
1273: n = ii[i+1] - jrow;
1274: sum1 = 0.0;
1275: sum2 = 0.0;
1276: sum3 = 0.0;
1277: sum4 = 0.0;
1278: sum5 = 0.0;
1279: sum6 = 0.0;
1280: sum7 = 0.0;
1281: sum8 = 0.0;
1282: nonzerorow += (n>0);
1283: for (j=0; j<n; j++) {
1284: sum1 += v[jrow]*x[8*idx[jrow]];
1285: sum2 += v[jrow]*x[8*idx[jrow]+1];
1286: sum3 += v[jrow]*x[8*idx[jrow]+2];
1287: sum4 += v[jrow]*x[8*idx[jrow]+3];
1288: sum5 += v[jrow]*x[8*idx[jrow]+4];
1289: sum6 += v[jrow]*x[8*idx[jrow]+5];
1290: sum7 += v[jrow]*x[8*idx[jrow]+6];
1291: sum8 += v[jrow]*x[8*idx[jrow]+7];
1292: jrow++;
1293: }
1294: y[8*i] = sum1;
1295: y[8*i+1] = sum2;
1296: y[8*i+2] = sum3;
1297: y[8*i+3] = sum4;
1298: y[8*i+4] = sum5;
1299: y[8*i+5] = sum6;
1300: y[8*i+6] = sum7;
1301: y[8*i+7] = sum8;
1302: }
1304: PetscLogFlops(16.0*a->nz - 8.0*nonzerorow);
1305: VecRestoreArray(xx,(PetscScalar**)&x);
1306: VecRestoreArray(yy,&y);
1307: return(0);
1308: }
1312: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1313: {
1314: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1315: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1316: const PetscScalar *x,*v;
1317: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1318: PetscErrorCode ierr;
1319: const PetscInt m = b->AIJ->rmap->n,*idx;
1320: PetscInt n,i;
1323: VecSet(yy,0.0);
1324: VecGetArray(xx,(PetscScalar**)&x);
1325: VecGetArray(yy,&y);
1327: for (i=0; i<m; i++) {
1328: idx = a->j + a->i[i] ;
1329: v = a->a + a->i[i] ;
1330: n = a->i[i+1] - a->i[i];
1331: alpha1 = x[8*i];
1332: alpha2 = x[8*i+1];
1333: alpha3 = x[8*i+2];
1334: alpha4 = x[8*i+3];
1335: alpha5 = x[8*i+4];
1336: alpha6 = x[8*i+5];
1337: alpha7 = x[8*i+6];
1338: alpha8 = x[8*i+7];
1339: while (n-->0) {
1340: y[8*(*idx)] += alpha1*(*v);
1341: y[8*(*idx)+1] += alpha2*(*v);
1342: y[8*(*idx)+2] += alpha3*(*v);
1343: y[8*(*idx)+3] += alpha4*(*v);
1344: y[8*(*idx)+4] += alpha5*(*v);
1345: y[8*(*idx)+5] += alpha6*(*v);
1346: y[8*(*idx)+6] += alpha7*(*v);
1347: y[8*(*idx)+7] += alpha8*(*v);
1348: idx++; v++;
1349: }
1350: }
1351: PetscLogFlops(16.0*a->nz);
1352: VecRestoreArray(xx,(PetscScalar**)&x);
1353: VecRestoreArray(yy,&y);
1354: return(0);
1355: }
1359: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1360: {
1361: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1363: const PetscScalar *x,*v;
1364: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1365: PetscErrorCode ierr;
1366: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1367: PetscInt n,i,jrow,j;
1370: if (yy != zz) {VecCopy(yy,zz);}
1371: VecGetArray(xx,(PetscScalar**)&x);
1372: VecGetArray(zz,&y);
1373: idx = a->j;
1374: v = a->a;
1375: ii = a->i;
1377: for (i=0; i<m; i++) {
1378: jrow = ii[i];
1379: n = ii[i+1] - jrow;
1380: sum1 = 0.0;
1381: sum2 = 0.0;
1382: sum3 = 0.0;
1383: sum4 = 0.0;
1384: sum5 = 0.0;
1385: sum6 = 0.0;
1386: sum7 = 0.0;
1387: sum8 = 0.0;
1388: for (j=0; j<n; j++) {
1389: sum1 += v[jrow]*x[8*idx[jrow]];
1390: sum2 += v[jrow]*x[8*idx[jrow]+1];
1391: sum3 += v[jrow]*x[8*idx[jrow]+2];
1392: sum4 += v[jrow]*x[8*idx[jrow]+3];
1393: sum5 += v[jrow]*x[8*idx[jrow]+4];
1394: sum6 += v[jrow]*x[8*idx[jrow]+5];
1395: sum7 += v[jrow]*x[8*idx[jrow]+6];
1396: sum8 += v[jrow]*x[8*idx[jrow]+7];
1397: jrow++;
1398: }
1399: y[8*i] += sum1;
1400: y[8*i+1] += sum2;
1401: y[8*i+2] += sum3;
1402: y[8*i+3] += sum4;
1403: y[8*i+4] += sum5;
1404: y[8*i+5] += sum6;
1405: y[8*i+6] += sum7;
1406: y[8*i+7] += sum8;
1407: }
1409: PetscLogFlops(16.0*a->nz);
1410: VecRestoreArray(xx,(PetscScalar**)&x);
1411: VecRestoreArray(zz,&y);
1412: return(0);
1413: }
1417: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1418: {
1419: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1420: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1421: const PetscScalar *x,*v;
1422: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1423: PetscErrorCode ierr;
1424: const PetscInt m = b->AIJ->rmap->n,*idx;
1425: PetscInt n,i;
1428: if (yy != zz) {VecCopy(yy,zz);}
1429: VecGetArray(xx,(PetscScalar**)&x);
1430: VecGetArray(zz,&y);
1431: for (i=0; i<m; i++) {
1432: idx = a->j + a->i[i] ;
1433: v = a->a + a->i[i] ;
1434: n = a->i[i+1] - a->i[i];
1435: alpha1 = x[8*i];
1436: alpha2 = x[8*i+1];
1437: alpha3 = x[8*i+2];
1438: alpha4 = x[8*i+3];
1439: alpha5 = x[8*i+4];
1440: alpha6 = x[8*i+5];
1441: alpha7 = x[8*i+6];
1442: alpha8 = x[8*i+7];
1443: while (n-->0) {
1444: y[8*(*idx)] += alpha1*(*v);
1445: y[8*(*idx)+1] += alpha2*(*v);
1446: y[8*(*idx)+2] += alpha3*(*v);
1447: y[8*(*idx)+3] += alpha4*(*v);
1448: y[8*(*idx)+4] += alpha5*(*v);
1449: y[8*(*idx)+5] += alpha6*(*v);
1450: y[8*(*idx)+6] += alpha7*(*v);
1451: y[8*(*idx)+7] += alpha8*(*v);
1452: idx++; v++;
1453: }
1454: }
1455: PetscLogFlops(16.0*a->nz);
1456: VecRestoreArray(xx,(PetscScalar**)&x);
1457: VecRestoreArray(zz,&y);
1458: return(0);
1459: }
1461: /* ------------------------------------------------------------------------------*/
1464: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1465: {
1466: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1467: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1468: const PetscScalar *x,*v;
1469: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1470: PetscErrorCode ierr;
1471: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1472: PetscInt nonzerorow=0,n,i,jrow,j;
1475: VecGetArray(xx,(PetscScalar**)&x);
1476: VecGetArray(yy,&y);
1477: idx = a->j;
1478: v = a->a;
1479: ii = a->i;
1481: for (i=0; i<m; i++) {
1482: jrow = ii[i];
1483: n = ii[i+1] - jrow;
1484: sum1 = 0.0;
1485: sum2 = 0.0;
1486: sum3 = 0.0;
1487: sum4 = 0.0;
1488: sum5 = 0.0;
1489: sum6 = 0.0;
1490: sum7 = 0.0;
1491: sum8 = 0.0;
1492: sum9 = 0.0;
1493: nonzerorow += (n>0);
1494: for (j=0; j<n; j++) {
1495: sum1 += v[jrow]*x[9*idx[jrow]];
1496: sum2 += v[jrow]*x[9*idx[jrow]+1];
1497: sum3 += v[jrow]*x[9*idx[jrow]+2];
1498: sum4 += v[jrow]*x[9*idx[jrow]+3];
1499: sum5 += v[jrow]*x[9*idx[jrow]+4];
1500: sum6 += v[jrow]*x[9*idx[jrow]+5];
1501: sum7 += v[jrow]*x[9*idx[jrow]+6];
1502: sum8 += v[jrow]*x[9*idx[jrow]+7];
1503: sum9 += v[jrow]*x[9*idx[jrow]+8];
1504: jrow++;
1505: }
1506: y[9*i] = sum1;
1507: y[9*i+1] = sum2;
1508: y[9*i+2] = sum3;
1509: y[9*i+3] = sum4;
1510: y[9*i+4] = sum5;
1511: y[9*i+5] = sum6;
1512: y[9*i+6] = sum7;
1513: y[9*i+7] = sum8;
1514: y[9*i+8] = sum9;
1515: }
1517: PetscLogFlops(18.0*a->nz - 9*nonzerorow);
1518: VecRestoreArray(xx,(PetscScalar**)&x);
1519: VecRestoreArray(yy,&y);
1520: return(0);
1521: }
1523: /* ------------------------------------------------------------------------------*/
1527: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1528: {
1529: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1530: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1531: const PetscScalar *x,*v;
1532: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1533: PetscErrorCode ierr;
1534: const PetscInt m = b->AIJ->rmap->n,*idx;
1535: PetscInt n,i;
1538: VecSet(yy,0.0);
1539: VecGetArray(xx,(PetscScalar**)&x);
1540: VecGetArray(yy,&y);
1542: for (i=0; i<m; i++) {
1543: idx = a->j + a->i[i] ;
1544: v = a->a + a->i[i] ;
1545: n = a->i[i+1] - a->i[i];
1546: alpha1 = x[9*i];
1547: alpha2 = x[9*i+1];
1548: alpha3 = x[9*i+2];
1549: alpha4 = x[9*i+3];
1550: alpha5 = x[9*i+4];
1551: alpha6 = x[9*i+5];
1552: alpha7 = x[9*i+6];
1553: alpha8 = x[9*i+7];
1554: alpha9 = x[9*i+8];
1555: while (n-->0) {
1556: y[9*(*idx)] += alpha1*(*v);
1557: y[9*(*idx)+1] += alpha2*(*v);
1558: y[9*(*idx)+2] += alpha3*(*v);
1559: y[9*(*idx)+3] += alpha4*(*v);
1560: y[9*(*idx)+4] += alpha5*(*v);
1561: y[9*(*idx)+5] += alpha6*(*v);
1562: y[9*(*idx)+6] += alpha7*(*v);
1563: y[9*(*idx)+7] += alpha8*(*v);
1564: y[9*(*idx)+8] += alpha9*(*v);
1565: idx++; v++;
1566: }
1567: }
1568: PetscLogFlops(18.0*a->nz);
1569: VecRestoreArray(xx,(PetscScalar**)&x);
1570: VecRestoreArray(yy,&y);
1571: return(0);
1572: }
1576: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1577: {
1578: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1579: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1580: const PetscScalar *x,*v;
1581: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1582: PetscErrorCode ierr;
1583: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1584: PetscInt n,i,jrow,j;
1587: if (yy != zz) {VecCopy(yy,zz);}
1588: VecGetArray(xx,(PetscScalar**)&x);
1589: VecGetArray(zz,&y);
1590: idx = a->j;
1591: v = a->a;
1592: ii = a->i;
1594: for (i=0; i<m; i++) {
1595: jrow = ii[i];
1596: n = ii[i+1] - jrow;
1597: sum1 = 0.0;
1598: sum2 = 0.0;
1599: sum3 = 0.0;
1600: sum4 = 0.0;
1601: sum5 = 0.0;
1602: sum6 = 0.0;
1603: sum7 = 0.0;
1604: sum8 = 0.0;
1605: sum9 = 0.0;
1606: for (j=0; j<n; j++) {
1607: sum1 += v[jrow]*x[9*idx[jrow]];
1608: sum2 += v[jrow]*x[9*idx[jrow]+1];
1609: sum3 += v[jrow]*x[9*idx[jrow]+2];
1610: sum4 += v[jrow]*x[9*idx[jrow]+3];
1611: sum5 += v[jrow]*x[9*idx[jrow]+4];
1612: sum6 += v[jrow]*x[9*idx[jrow]+5];
1613: sum7 += v[jrow]*x[9*idx[jrow]+6];
1614: sum8 += v[jrow]*x[9*idx[jrow]+7];
1615: sum9 += v[jrow]*x[9*idx[jrow]+8];
1616: jrow++;
1617: }
1618: y[9*i] += sum1;
1619: y[9*i+1] += sum2;
1620: y[9*i+2] += sum3;
1621: y[9*i+3] += sum4;
1622: y[9*i+4] += sum5;
1623: y[9*i+5] += sum6;
1624: y[9*i+6] += sum7;
1625: y[9*i+7] += sum8;
1626: y[9*i+8] += sum9;
1627: }
1629: PetscLogFlops(18*a->nz);
1630: VecRestoreArray(xx,(PetscScalar**)&x);
1631: VecRestoreArray(zz,&y);
1632: return(0);
1633: }
1637: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1638: {
1639: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1640: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1641: const PetscScalar *x,*v;
1642: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1643: PetscErrorCode ierr;
1644: const PetscInt m = b->AIJ->rmap->n,*idx;
1645: PetscInt n,i;
1648: if (yy != zz) {VecCopy(yy,zz);}
1649: VecGetArray(xx,(PetscScalar**)&x);
1650: VecGetArray(zz,&y);
1651: for (i=0; i<m; i++) {
1652: idx = a->j + a->i[i] ;
1653: v = a->a + a->i[i] ;
1654: n = a->i[i+1] - a->i[i];
1655: alpha1 = x[9*i];
1656: alpha2 = x[9*i+1];
1657: alpha3 = x[9*i+2];
1658: alpha4 = x[9*i+3];
1659: alpha5 = x[9*i+4];
1660: alpha6 = x[9*i+5];
1661: alpha7 = x[9*i+6];
1662: alpha8 = x[9*i+7];
1663: alpha9 = x[9*i+8];
1664: while (n-->0) {
1665: y[9*(*idx)] += alpha1*(*v);
1666: y[9*(*idx)+1] += alpha2*(*v);
1667: y[9*(*idx)+2] += alpha3*(*v);
1668: y[9*(*idx)+3] += alpha4*(*v);
1669: y[9*(*idx)+4] += alpha5*(*v);
1670: y[9*(*idx)+5] += alpha6*(*v);
1671: y[9*(*idx)+6] += alpha7*(*v);
1672: y[9*(*idx)+7] += alpha8*(*v);
1673: y[9*(*idx)+8] += alpha9*(*v);
1674: idx++; v++;
1675: }
1676: }
1677: PetscLogFlops(18.0*a->nz);
1678: VecRestoreArray(xx,(PetscScalar**)&x);
1679: VecRestoreArray(zz,&y);
1680: return(0);
1681: }
1684: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1685: {
1686: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1687: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1688: const PetscScalar *x,*v;
1689: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1690: PetscErrorCode ierr;
1691: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1692: PetscInt nonzerorow=0,n,i,jrow,j;
1695: VecGetArray(xx,(PetscScalar**)&x);
1696: VecGetArray(yy,&y);
1697: idx = a->j;
1698: v = a->a;
1699: ii = a->i;
1701: for (i=0; i<m; i++) {
1702: jrow = ii[i];
1703: n = ii[i+1] - jrow;
1704: sum1 = 0.0;
1705: sum2 = 0.0;
1706: sum3 = 0.0;
1707: sum4 = 0.0;
1708: sum5 = 0.0;
1709: sum6 = 0.0;
1710: sum7 = 0.0;
1711: sum8 = 0.0;
1712: sum9 = 0.0;
1713: sum10 = 0.0;
1714: nonzerorow += (n>0);
1715: for (j=0; j<n; j++) {
1716: sum1 += v[jrow]*x[10*idx[jrow]];
1717: sum2 += v[jrow]*x[10*idx[jrow]+1];
1718: sum3 += v[jrow]*x[10*idx[jrow]+2];
1719: sum4 += v[jrow]*x[10*idx[jrow]+3];
1720: sum5 += v[jrow]*x[10*idx[jrow]+4];
1721: sum6 += v[jrow]*x[10*idx[jrow]+5];
1722: sum7 += v[jrow]*x[10*idx[jrow]+6];
1723: sum8 += v[jrow]*x[10*idx[jrow]+7];
1724: sum9 += v[jrow]*x[10*idx[jrow]+8];
1725: sum10 += v[jrow]*x[10*idx[jrow]+9];
1726: jrow++;
1727: }
1728: y[10*i] = sum1;
1729: y[10*i+1] = sum2;
1730: y[10*i+2] = sum3;
1731: y[10*i+3] = sum4;
1732: y[10*i+4] = sum5;
1733: y[10*i+5] = sum6;
1734: y[10*i+6] = sum7;
1735: y[10*i+7] = sum8;
1736: y[10*i+8] = sum9;
1737: y[10*i+9] = sum10;
1738: }
1740: PetscLogFlops(20.0*a->nz - 10.0*nonzerorow);
1741: VecRestoreArray(xx,(PetscScalar**)&x);
1742: VecRestoreArray(yy,&y);
1743: return(0);
1744: }
1748: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1749: {
1750: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1751: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1752: const PetscScalar *x,*v;
1753: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1754: PetscErrorCode ierr;
1755: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1756: PetscInt n,i,jrow,j;
1759: if (yy != zz) {VecCopy(yy,zz);}
1760: VecGetArray(xx,(PetscScalar**)&x);
1761: VecGetArray(zz,&y);
1762: idx = a->j;
1763: v = a->a;
1764: ii = a->i;
1766: for (i=0; i<m; i++) {
1767: jrow = ii[i];
1768: n = ii[i+1] - jrow;
1769: sum1 = 0.0;
1770: sum2 = 0.0;
1771: sum3 = 0.0;
1772: sum4 = 0.0;
1773: sum5 = 0.0;
1774: sum6 = 0.0;
1775: sum7 = 0.0;
1776: sum8 = 0.0;
1777: sum9 = 0.0;
1778: sum10 = 0.0;
1779: for (j=0; j<n; j++) {
1780: sum1 += v[jrow]*x[10*idx[jrow]];
1781: sum2 += v[jrow]*x[10*idx[jrow]+1];
1782: sum3 += v[jrow]*x[10*idx[jrow]+2];
1783: sum4 += v[jrow]*x[10*idx[jrow]+3];
1784: sum5 += v[jrow]*x[10*idx[jrow]+4];
1785: sum6 += v[jrow]*x[10*idx[jrow]+5];
1786: sum7 += v[jrow]*x[10*idx[jrow]+6];
1787: sum8 += v[jrow]*x[10*idx[jrow]+7];
1788: sum9 += v[jrow]*x[10*idx[jrow]+8];
1789: sum10 += v[jrow]*x[10*idx[jrow]+9];
1790: jrow++;
1791: }
1792: y[10*i] += sum1;
1793: y[10*i+1] += sum2;
1794: y[10*i+2] += sum3;
1795: y[10*i+3] += sum4;
1796: y[10*i+4] += sum5;
1797: y[10*i+5] += sum6;
1798: y[10*i+6] += sum7;
1799: y[10*i+7] += sum8;
1800: y[10*i+8] += sum9;
1801: y[10*i+9] += sum10;
1802: }
1804: PetscLogFlops(20.0*a->nz);
1805: VecRestoreArray(xx,(PetscScalar**)&x);
1806: VecRestoreArray(yy,&y);
1807: return(0);
1808: }
1812: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1813: {
1814: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1815: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1816: const PetscScalar *x,*v;
1817: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1818: PetscErrorCode ierr;
1819: const PetscInt m = b->AIJ->rmap->n,*idx;
1820: PetscInt n,i;
1823: VecSet(yy,0.0);
1824: VecGetArray(xx,(PetscScalar**)&x);
1825: VecGetArray(yy,&y);
1827: for (i=0; i<m; i++) {
1828: idx = a->j + a->i[i] ;
1829: v = a->a + a->i[i] ;
1830: n = a->i[i+1] - a->i[i];
1831: alpha1 = x[10*i];
1832: alpha2 = x[10*i+1];
1833: alpha3 = x[10*i+2];
1834: alpha4 = x[10*i+3];
1835: alpha5 = x[10*i+4];
1836: alpha6 = x[10*i+5];
1837: alpha7 = x[10*i+6];
1838: alpha8 = x[10*i+7];
1839: alpha9 = x[10*i+8];
1840: alpha10 = x[10*i+9];
1841: while (n-->0) {
1842: y[10*(*idx)] += alpha1*(*v);
1843: y[10*(*idx)+1] += alpha2*(*v);
1844: y[10*(*idx)+2] += alpha3*(*v);
1845: y[10*(*idx)+3] += alpha4*(*v);
1846: y[10*(*idx)+4] += alpha5*(*v);
1847: y[10*(*idx)+5] += alpha6*(*v);
1848: y[10*(*idx)+6] += alpha7*(*v);
1849: y[10*(*idx)+7] += alpha8*(*v);
1850: y[10*(*idx)+8] += alpha9*(*v);
1851: y[10*(*idx)+9] += alpha10*(*v);
1852: idx++; v++;
1853: }
1854: }
1855: PetscLogFlops(20.0*a->nz);
1856: VecRestoreArray(xx,(PetscScalar**)&x);
1857: VecRestoreArray(yy,&y);
1858: return(0);
1859: }
1863: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1864: {
1865: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1866: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1867: const PetscScalar *x,*v;
1868: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1869: PetscErrorCode ierr;
1870: const PetscInt m = b->AIJ->rmap->n,*idx;
1871: PetscInt n,i;
1874: if (yy != zz) {VecCopy(yy,zz);}
1875: VecGetArray(xx,(PetscScalar**)&x);
1876: VecGetArray(zz,&y);
1877: for (i=0; i<m; i++) {
1878: idx = a->j + a->i[i] ;
1879: v = a->a + a->i[i] ;
1880: n = a->i[i+1] - a->i[i];
1881: alpha1 = x[10*i];
1882: alpha2 = x[10*i+1];
1883: alpha3 = x[10*i+2];
1884: alpha4 = x[10*i+3];
1885: alpha5 = x[10*i+4];
1886: alpha6 = x[10*i+5];
1887: alpha7 = x[10*i+6];
1888: alpha8 = x[10*i+7];
1889: alpha9 = x[10*i+8];
1890: alpha10 = x[10*i+9];
1891: while (n-->0) {
1892: y[10*(*idx)] += alpha1*(*v);
1893: y[10*(*idx)+1] += alpha2*(*v);
1894: y[10*(*idx)+2] += alpha3*(*v);
1895: y[10*(*idx)+3] += alpha4*(*v);
1896: y[10*(*idx)+4] += alpha5*(*v);
1897: y[10*(*idx)+5] += alpha6*(*v);
1898: y[10*(*idx)+6] += alpha7*(*v);
1899: y[10*(*idx)+7] += alpha8*(*v);
1900: y[10*(*idx)+8] += alpha9*(*v);
1901: y[10*(*idx)+9] += alpha10*(*v);
1902: idx++; v++;
1903: }
1904: }
1905: PetscLogFlops(20.0*a->nz);
1906: VecRestoreArray(xx,(PetscScalar**)&x);
1907: VecRestoreArray(zz,&y);
1908: return(0);
1909: }
1912: /*--------------------------------------------------------------------------------------------*/
1915: PetscErrorCode MatMult_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
1916: {
1917: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1918: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1919: const PetscScalar *x,*v;
1920: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1921: PetscErrorCode ierr;
1922: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1923: PetscInt nonzerorow=0,n,i,jrow,j;
1926: VecGetArray(xx,(PetscScalar**)&x);
1927: VecGetArray(yy,&y);
1928: idx = a->j;
1929: v = a->a;
1930: ii = a->i;
1932: for (i=0; i<m; i++) {
1933: jrow = ii[i];
1934: n = ii[i+1] - jrow;
1935: sum1 = 0.0;
1936: sum2 = 0.0;
1937: sum3 = 0.0;
1938: sum4 = 0.0;
1939: sum5 = 0.0;
1940: sum6 = 0.0;
1941: sum7 = 0.0;
1942: sum8 = 0.0;
1943: sum9 = 0.0;
1944: sum10 = 0.0;
1945: sum11 = 0.0;
1946: nonzerorow += (n>0);
1947: for (j=0; j<n; j++) {
1948: sum1 += v[jrow]*x[11*idx[jrow]];
1949: sum2 += v[jrow]*x[11*idx[jrow]+1];
1950: sum3 += v[jrow]*x[11*idx[jrow]+2];
1951: sum4 += v[jrow]*x[11*idx[jrow]+3];
1952: sum5 += v[jrow]*x[11*idx[jrow]+4];
1953: sum6 += v[jrow]*x[11*idx[jrow]+5];
1954: sum7 += v[jrow]*x[11*idx[jrow]+6];
1955: sum8 += v[jrow]*x[11*idx[jrow]+7];
1956: sum9 += v[jrow]*x[11*idx[jrow]+8];
1957: sum10 += v[jrow]*x[11*idx[jrow]+9];
1958: sum11 += v[jrow]*x[11*idx[jrow]+10];
1959: jrow++;
1960: }
1961: y[11*i] = sum1;
1962: y[11*i+1] = sum2;
1963: y[11*i+2] = sum3;
1964: y[11*i+3] = sum4;
1965: y[11*i+4] = sum5;
1966: y[11*i+5] = sum6;
1967: y[11*i+6] = sum7;
1968: y[11*i+7] = sum8;
1969: y[11*i+8] = sum9;
1970: y[11*i+9] = sum10;
1971: y[11*i+10] = sum11;
1972: }
1974: PetscLogFlops(22*a->nz - 11*nonzerorow);
1975: VecRestoreArray(xx,(PetscScalar**)&x);
1976: VecRestoreArray(yy,&y);
1977: return(0);
1978: }
1982: PetscErrorCode MatMultAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
1983: {
1984: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1985: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1986: const PetscScalar *x,*v;
1987: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10, sum11;
1988: PetscErrorCode ierr;
1989: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
1990: PetscInt n,i,jrow,j;
1993: if (yy != zz) {VecCopy(yy,zz);}
1994: VecGetArray(xx,(PetscScalar**)&x);
1995: VecGetArray(zz,&y);
1996: idx = a->j;
1997: v = a->a;
1998: ii = a->i;
2000: for (i=0; i<m; i++) {
2001: jrow = ii[i];
2002: n = ii[i+1] - jrow;
2003: sum1 = 0.0;
2004: sum2 = 0.0;
2005: sum3 = 0.0;
2006: sum4 = 0.0;
2007: sum5 = 0.0;
2008: sum6 = 0.0;
2009: sum7 = 0.0;
2010: sum8 = 0.0;
2011: sum9 = 0.0;
2012: sum10 = 0.0;
2013: sum11 = 0.0;
2014: for (j=0; j<n; j++) {
2015: sum1 += v[jrow]*x[11*idx[jrow]];
2016: sum2 += v[jrow]*x[11*idx[jrow]+1];
2017: sum3 += v[jrow]*x[11*idx[jrow]+2];
2018: sum4 += v[jrow]*x[11*idx[jrow]+3];
2019: sum5 += v[jrow]*x[11*idx[jrow]+4];
2020: sum6 += v[jrow]*x[11*idx[jrow]+5];
2021: sum7 += v[jrow]*x[11*idx[jrow]+6];
2022: sum8 += v[jrow]*x[11*idx[jrow]+7];
2023: sum9 += v[jrow]*x[11*idx[jrow]+8];
2024: sum10 += v[jrow]*x[11*idx[jrow]+9];
2025: sum11 += v[jrow]*x[11*idx[jrow]+10];
2026: jrow++;
2027: }
2028: y[11*i] += sum1;
2029: y[11*i+1] += sum2;
2030: y[11*i+2] += sum3;
2031: y[11*i+3] += sum4;
2032: y[11*i+4] += sum5;
2033: y[11*i+5] += sum6;
2034: y[11*i+6] += sum7;
2035: y[11*i+7] += sum8;
2036: y[11*i+8] += sum9;
2037: y[11*i+9] += sum10;
2038: y[11*i+10] += sum11;
2039: }
2041: PetscLogFlops(22*a->nz);
2042: VecRestoreArray(xx,(PetscScalar**)&x);
2043: VecRestoreArray(yy,&y);
2044: return(0);
2045: }
2049: PetscErrorCode MatMultTranspose_SeqMAIJ_11(Mat A,Vec xx,Vec yy)
2050: {
2051: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2052: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2053: const PetscScalar *x,*v;
2054: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2055: PetscErrorCode ierr;
2056: const PetscInt m = b->AIJ->rmap->n,*idx;
2057: PetscInt n,i;
2060: VecSet(yy,0.0);
2061: VecGetArray(xx,(PetscScalar**)&x);
2062: VecGetArray(yy,&y);
2064: for (i=0; i<m; i++) {
2065: idx = a->j + a->i[i] ;
2066: v = a->a + a->i[i] ;
2067: n = a->i[i+1] - a->i[i];
2068: alpha1 = x[11*i];
2069: alpha2 = x[11*i+1];
2070: alpha3 = x[11*i+2];
2071: alpha4 = x[11*i+3];
2072: alpha5 = x[11*i+4];
2073: alpha6 = x[11*i+5];
2074: alpha7 = x[11*i+6];
2075: alpha8 = x[11*i+7];
2076: alpha9 = x[11*i+8];
2077: alpha10 = x[11*i+9];
2078: alpha11 = x[11*i+10];
2079: while (n-->0) {
2080: y[11*(*idx)] += alpha1*(*v);
2081: y[11*(*idx)+1] += alpha2*(*v);
2082: y[11*(*idx)+2] += alpha3*(*v);
2083: y[11*(*idx)+3] += alpha4*(*v);
2084: y[11*(*idx)+4] += alpha5*(*v);
2085: y[11*(*idx)+5] += alpha6*(*v);
2086: y[11*(*idx)+6] += alpha7*(*v);
2087: y[11*(*idx)+7] += alpha8*(*v);
2088: y[11*(*idx)+8] += alpha9*(*v);
2089: y[11*(*idx)+9] += alpha10*(*v);
2090: y[11*(*idx)+10] += alpha11*(*v);
2091: idx++; v++;
2092: }
2093: }
2094: PetscLogFlops(22*a->nz);
2095: VecRestoreArray(xx,(PetscScalar**)&x);
2096: VecRestoreArray(yy,&y);
2097: return(0);
2098: }
2102: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_11(Mat A,Vec xx,Vec yy,Vec zz)
2103: {
2104: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2105: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2106: const PetscScalar *x,*v;
2107: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,alpha11;
2108: PetscErrorCode ierr;
2109: const PetscInt m = b->AIJ->rmap->n,*idx;
2110: PetscInt n,i;
2113: if (yy != zz) {VecCopy(yy,zz);}
2114: VecGetArray(xx,(PetscScalar**)&x);
2115: VecGetArray(zz,&y);
2116: for (i=0; i<m; i++) {
2117: idx = a->j + a->i[i] ;
2118: v = a->a + a->i[i] ;
2119: n = a->i[i+1] - a->i[i];
2120: alpha1 = x[11*i];
2121: alpha2 = x[11*i+1];
2122: alpha3 = x[11*i+2];
2123: alpha4 = x[11*i+3];
2124: alpha5 = x[11*i+4];
2125: alpha6 = x[11*i+5];
2126: alpha7 = x[11*i+6];
2127: alpha8 = x[11*i+7];
2128: alpha9 = x[11*i+8];
2129: alpha10 = x[11*i+9];
2130: alpha11 = x[11*i+10];
2131: while (n-->0) {
2132: y[11*(*idx)] += alpha1*(*v);
2133: y[11*(*idx)+1] += alpha2*(*v);
2134: y[11*(*idx)+2] += alpha3*(*v);
2135: y[11*(*idx)+3] += alpha4*(*v);
2136: y[11*(*idx)+4] += alpha5*(*v);
2137: y[11*(*idx)+5] += alpha6*(*v);
2138: y[11*(*idx)+6] += alpha7*(*v);
2139: y[11*(*idx)+7] += alpha8*(*v);
2140: y[11*(*idx)+8] += alpha9*(*v);
2141: y[11*(*idx)+9] += alpha10*(*v);
2142: y[11*(*idx)+10] += alpha11*(*v);
2143: idx++; v++;
2144: }
2145: }
2146: PetscLogFlops(22*a->nz);
2147: VecRestoreArray(xx,(PetscScalar**)&x);
2148: VecRestoreArray(zz,&y);
2149: return(0);
2150: }
2153: /*--------------------------------------------------------------------------------------------*/
2156: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2157: {
2158: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2159: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2160: const PetscScalar *x,*v;
2161: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2162: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2163: PetscErrorCode ierr;
2164: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2165: PetscInt nonzerorow=0,n,i,jrow,j;
2168: VecGetArray(xx,(PetscScalar**)&x);
2169: VecGetArray(yy,&y);
2170: idx = a->j;
2171: v = a->a;
2172: ii = a->i;
2174: for (i=0; i<m; i++) {
2175: jrow = ii[i];
2176: n = ii[i+1] - jrow;
2177: sum1 = 0.0;
2178: sum2 = 0.0;
2179: sum3 = 0.0;
2180: sum4 = 0.0;
2181: sum5 = 0.0;
2182: sum6 = 0.0;
2183: sum7 = 0.0;
2184: sum8 = 0.0;
2185: sum9 = 0.0;
2186: sum10 = 0.0;
2187: sum11 = 0.0;
2188: sum12 = 0.0;
2189: sum13 = 0.0;
2190: sum14 = 0.0;
2191: sum15 = 0.0;
2192: sum16 = 0.0;
2193: nonzerorow += (n>0);
2194: for (j=0; j<n; j++) {
2195: sum1 += v[jrow]*x[16*idx[jrow]];
2196: sum2 += v[jrow]*x[16*idx[jrow]+1];
2197: sum3 += v[jrow]*x[16*idx[jrow]+2];
2198: sum4 += v[jrow]*x[16*idx[jrow]+3];
2199: sum5 += v[jrow]*x[16*idx[jrow]+4];
2200: sum6 += v[jrow]*x[16*idx[jrow]+5];
2201: sum7 += v[jrow]*x[16*idx[jrow]+6];
2202: sum8 += v[jrow]*x[16*idx[jrow]+7];
2203: sum9 += v[jrow]*x[16*idx[jrow]+8];
2204: sum10 += v[jrow]*x[16*idx[jrow]+9];
2205: sum11 += v[jrow]*x[16*idx[jrow]+10];
2206: sum12 += v[jrow]*x[16*idx[jrow]+11];
2207: sum13 += v[jrow]*x[16*idx[jrow]+12];
2208: sum14 += v[jrow]*x[16*idx[jrow]+13];
2209: sum15 += v[jrow]*x[16*idx[jrow]+14];
2210: sum16 += v[jrow]*x[16*idx[jrow]+15];
2211: jrow++;
2212: }
2213: y[16*i] = sum1;
2214: y[16*i+1] = sum2;
2215: y[16*i+2] = sum3;
2216: y[16*i+3] = sum4;
2217: y[16*i+4] = sum5;
2218: y[16*i+5] = sum6;
2219: y[16*i+6] = sum7;
2220: y[16*i+7] = sum8;
2221: y[16*i+8] = sum9;
2222: y[16*i+9] = sum10;
2223: y[16*i+10] = sum11;
2224: y[16*i+11] = sum12;
2225: y[16*i+12] = sum13;
2226: y[16*i+13] = sum14;
2227: y[16*i+14] = sum15;
2228: y[16*i+15] = sum16;
2229: }
2231: PetscLogFlops(32.0*a->nz - 16.0*nonzerorow);
2232: VecRestoreArray(xx,(PetscScalar**)&x);
2233: VecRestoreArray(yy,&y);
2234: return(0);
2235: }
2239: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
2240: {
2241: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2242: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2243: const PetscScalar *x,*v;
2244: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2245: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2246: PetscErrorCode ierr;
2247: const PetscInt m = b->AIJ->rmap->n,*idx;
2248: PetscInt n,i;
2251: VecSet(yy,0.0);
2252: VecGetArray(xx,(PetscScalar**)&x);
2253: VecGetArray(yy,&y);
2255: for (i=0; i<m; i++) {
2256: idx = a->j + a->i[i] ;
2257: v = a->a + a->i[i] ;
2258: n = a->i[i+1] - a->i[i];
2259: alpha1 = x[16*i];
2260: alpha2 = x[16*i+1];
2261: alpha3 = x[16*i+2];
2262: alpha4 = x[16*i+3];
2263: alpha5 = x[16*i+4];
2264: alpha6 = x[16*i+5];
2265: alpha7 = x[16*i+6];
2266: alpha8 = x[16*i+7];
2267: alpha9 = x[16*i+8];
2268: alpha10 = x[16*i+9];
2269: alpha11 = x[16*i+10];
2270: alpha12 = x[16*i+11];
2271: alpha13 = x[16*i+12];
2272: alpha14 = x[16*i+13];
2273: alpha15 = x[16*i+14];
2274: alpha16 = x[16*i+15];
2275: while (n-->0) {
2276: y[16*(*idx)] += alpha1*(*v);
2277: y[16*(*idx)+1] += alpha2*(*v);
2278: y[16*(*idx)+2] += alpha3*(*v);
2279: y[16*(*idx)+3] += alpha4*(*v);
2280: y[16*(*idx)+4] += alpha5*(*v);
2281: y[16*(*idx)+5] += alpha6*(*v);
2282: y[16*(*idx)+6] += alpha7*(*v);
2283: y[16*(*idx)+7] += alpha8*(*v);
2284: y[16*(*idx)+8] += alpha9*(*v);
2285: y[16*(*idx)+9] += alpha10*(*v);
2286: y[16*(*idx)+10] += alpha11*(*v);
2287: y[16*(*idx)+11] += alpha12*(*v);
2288: y[16*(*idx)+12] += alpha13*(*v);
2289: y[16*(*idx)+13] += alpha14*(*v);
2290: y[16*(*idx)+14] += alpha15*(*v);
2291: y[16*(*idx)+15] += alpha16*(*v);
2292: idx++; v++;
2293: }
2294: }
2295: PetscLogFlops(32.0*a->nz);
2296: VecRestoreArray(xx,(PetscScalar**)&x);
2297: VecRestoreArray(yy,&y);
2298: return(0);
2299: }
2303: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2304: {
2305: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2306: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2307: const PetscScalar *x,*v;
2308: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2309: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
2310: PetscErrorCode ierr;
2311: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2312: PetscInt n,i,jrow,j;
2315: if (yy != zz) {VecCopy(yy,zz);}
2316: VecGetArray(xx,(PetscScalar**)&x);
2317: VecGetArray(zz,&y);
2318: idx = a->j;
2319: v = a->a;
2320: ii = a->i;
2322: for (i=0; i<m; i++) {
2323: jrow = ii[i];
2324: n = ii[i+1] - jrow;
2325: sum1 = 0.0;
2326: sum2 = 0.0;
2327: sum3 = 0.0;
2328: sum4 = 0.0;
2329: sum5 = 0.0;
2330: sum6 = 0.0;
2331: sum7 = 0.0;
2332: sum8 = 0.0;
2333: sum9 = 0.0;
2334: sum10 = 0.0;
2335: sum11 = 0.0;
2336: sum12 = 0.0;
2337: sum13 = 0.0;
2338: sum14 = 0.0;
2339: sum15 = 0.0;
2340: sum16 = 0.0;
2341: for (j=0; j<n; j++) {
2342: sum1 += v[jrow]*x[16*idx[jrow]];
2343: sum2 += v[jrow]*x[16*idx[jrow]+1];
2344: sum3 += v[jrow]*x[16*idx[jrow]+2];
2345: sum4 += v[jrow]*x[16*idx[jrow]+3];
2346: sum5 += v[jrow]*x[16*idx[jrow]+4];
2347: sum6 += v[jrow]*x[16*idx[jrow]+5];
2348: sum7 += v[jrow]*x[16*idx[jrow]+6];
2349: sum8 += v[jrow]*x[16*idx[jrow]+7];
2350: sum9 += v[jrow]*x[16*idx[jrow]+8];
2351: sum10 += v[jrow]*x[16*idx[jrow]+9];
2352: sum11 += v[jrow]*x[16*idx[jrow]+10];
2353: sum12 += v[jrow]*x[16*idx[jrow]+11];
2354: sum13 += v[jrow]*x[16*idx[jrow]+12];
2355: sum14 += v[jrow]*x[16*idx[jrow]+13];
2356: sum15 += v[jrow]*x[16*idx[jrow]+14];
2357: sum16 += v[jrow]*x[16*idx[jrow]+15];
2358: jrow++;
2359: }
2360: y[16*i] += sum1;
2361: y[16*i+1] += sum2;
2362: y[16*i+2] += sum3;
2363: y[16*i+3] += sum4;
2364: y[16*i+4] += sum5;
2365: y[16*i+5] += sum6;
2366: y[16*i+6] += sum7;
2367: y[16*i+7] += sum8;
2368: y[16*i+8] += sum9;
2369: y[16*i+9] += sum10;
2370: y[16*i+10] += sum11;
2371: y[16*i+11] += sum12;
2372: y[16*i+12] += sum13;
2373: y[16*i+13] += sum14;
2374: y[16*i+14] += sum15;
2375: y[16*i+15] += sum16;
2376: }
2378: PetscLogFlops(32.0*a->nz);
2379: VecRestoreArray(xx,(PetscScalar**)&x);
2380: VecRestoreArray(zz,&y);
2381: return(0);
2382: }
2386: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2387: {
2388: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2389: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2390: const PetscScalar *x,*v;
2391: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2392: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2393: PetscErrorCode ierr;
2394: const PetscInt m = b->AIJ->rmap->n,*idx;
2395: PetscInt n,i;
2398: if (yy != zz) {VecCopy(yy,zz);}
2399: VecGetArray(xx,(PetscScalar**)&x);
2400: VecGetArray(zz,&y);
2401: for (i=0; i<m; i++) {
2402: idx = a->j + a->i[i] ;
2403: v = a->a + a->i[i] ;
2404: n = a->i[i+1] - a->i[i];
2405: alpha1 = x[16*i];
2406: alpha2 = x[16*i+1];
2407: alpha3 = x[16*i+2];
2408: alpha4 = x[16*i+3];
2409: alpha5 = x[16*i+4];
2410: alpha6 = x[16*i+5];
2411: alpha7 = x[16*i+6];
2412: alpha8 = x[16*i+7];
2413: alpha9 = x[16*i+8];
2414: alpha10 = x[16*i+9];
2415: alpha11 = x[16*i+10];
2416: alpha12 = x[16*i+11];
2417: alpha13 = x[16*i+12];
2418: alpha14 = x[16*i+13];
2419: alpha15 = x[16*i+14];
2420: alpha16 = x[16*i+15];
2421: while (n-->0) {
2422: y[16*(*idx)] += alpha1*(*v);
2423: y[16*(*idx)+1] += alpha2*(*v);
2424: y[16*(*idx)+2] += alpha3*(*v);
2425: y[16*(*idx)+3] += alpha4*(*v);
2426: y[16*(*idx)+4] += alpha5*(*v);
2427: y[16*(*idx)+5] += alpha6*(*v);
2428: y[16*(*idx)+6] += alpha7*(*v);
2429: y[16*(*idx)+7] += alpha8*(*v);
2430: y[16*(*idx)+8] += alpha9*(*v);
2431: y[16*(*idx)+9] += alpha10*(*v);
2432: y[16*(*idx)+10] += alpha11*(*v);
2433: y[16*(*idx)+11] += alpha12*(*v);
2434: y[16*(*idx)+12] += alpha13*(*v);
2435: y[16*(*idx)+13] += alpha14*(*v);
2436: y[16*(*idx)+14] += alpha15*(*v);
2437: y[16*(*idx)+15] += alpha16*(*v);
2438: idx++; v++;
2439: }
2440: }
2441: PetscLogFlops(32.0*a->nz);
2442: VecRestoreArray(xx,(PetscScalar**)&x);
2443: VecRestoreArray(zz,&y);
2444: return(0);
2445: }
2447: /*--------------------------------------------------------------------------------------------*/
2450: PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2451: {
2452: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2453: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2454: const PetscScalar *x,*v;
2455: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2456: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2457: PetscErrorCode ierr;
2458: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2459: PetscInt nonzerorow=0,n,i,jrow,j;
2462: VecGetArray(xx,(PetscScalar**)&x);
2463: VecGetArray(yy,&y);
2464: idx = a->j;
2465: v = a->a;
2466: ii = a->i;
2468: for (i=0; i<m; i++) {
2469: jrow = ii[i];
2470: n = ii[i+1] - jrow;
2471: sum1 = 0.0;
2472: sum2 = 0.0;
2473: sum3 = 0.0;
2474: sum4 = 0.0;
2475: sum5 = 0.0;
2476: sum6 = 0.0;
2477: sum7 = 0.0;
2478: sum8 = 0.0;
2479: sum9 = 0.0;
2480: sum10 = 0.0;
2481: sum11 = 0.0;
2482: sum12 = 0.0;
2483: sum13 = 0.0;
2484: sum14 = 0.0;
2485: sum15 = 0.0;
2486: sum16 = 0.0;
2487: sum17 = 0.0;
2488: sum18 = 0.0;
2489: nonzerorow += (n>0);
2490: for (j=0; j<n; j++) {
2491: sum1 += v[jrow]*x[18*idx[jrow]];
2492: sum2 += v[jrow]*x[18*idx[jrow]+1];
2493: sum3 += v[jrow]*x[18*idx[jrow]+2];
2494: sum4 += v[jrow]*x[18*idx[jrow]+3];
2495: sum5 += v[jrow]*x[18*idx[jrow]+4];
2496: sum6 += v[jrow]*x[18*idx[jrow]+5];
2497: sum7 += v[jrow]*x[18*idx[jrow]+6];
2498: sum8 += v[jrow]*x[18*idx[jrow]+7];
2499: sum9 += v[jrow]*x[18*idx[jrow]+8];
2500: sum10 += v[jrow]*x[18*idx[jrow]+9];
2501: sum11 += v[jrow]*x[18*idx[jrow]+10];
2502: sum12 += v[jrow]*x[18*idx[jrow]+11];
2503: sum13 += v[jrow]*x[18*idx[jrow]+12];
2504: sum14 += v[jrow]*x[18*idx[jrow]+13];
2505: sum15 += v[jrow]*x[18*idx[jrow]+14];
2506: sum16 += v[jrow]*x[18*idx[jrow]+15];
2507: sum17 += v[jrow]*x[18*idx[jrow]+16];
2508: sum18 += v[jrow]*x[18*idx[jrow]+17];
2509: jrow++;
2510: }
2511: y[18*i] = sum1;
2512: y[18*i+1] = sum2;
2513: y[18*i+2] = sum3;
2514: y[18*i+3] = sum4;
2515: y[18*i+4] = sum5;
2516: y[18*i+5] = sum6;
2517: y[18*i+6] = sum7;
2518: y[18*i+7] = sum8;
2519: y[18*i+8] = sum9;
2520: y[18*i+9] = sum10;
2521: y[18*i+10] = sum11;
2522: y[18*i+11] = sum12;
2523: y[18*i+12] = sum13;
2524: y[18*i+13] = sum14;
2525: y[18*i+14] = sum15;
2526: y[18*i+15] = sum16;
2527: y[18*i+16] = sum17;
2528: y[18*i+17] = sum18;
2529: }
2531: PetscLogFlops(36.0*a->nz - 18.0*nonzerorow);
2532: VecRestoreArray(xx,(PetscScalar**)&x);
2533: VecRestoreArray(yy,&y);
2534: return(0);
2535: }
2539: PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy)
2540: {
2541: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2542: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2543: const PetscScalar *x,*v;
2544: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2545: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2546: PetscErrorCode ierr;
2547: const PetscInt m = b->AIJ->rmap->n,*idx;
2548: PetscInt n,i;
2551: VecSet(yy,0.0);
2552: VecGetArray(xx,(PetscScalar**)&x);
2553: VecGetArray(yy,&y);
2555: for (i=0; i<m; i++) {
2556: idx = a->j + a->i[i] ;
2557: v = a->a + a->i[i] ;
2558: n = a->i[i+1] - a->i[i];
2559: alpha1 = x[18*i];
2560: alpha2 = x[18*i+1];
2561: alpha3 = x[18*i+2];
2562: alpha4 = x[18*i+3];
2563: alpha5 = x[18*i+4];
2564: alpha6 = x[18*i+5];
2565: alpha7 = x[18*i+6];
2566: alpha8 = x[18*i+7];
2567: alpha9 = x[18*i+8];
2568: alpha10 = x[18*i+9];
2569: alpha11 = x[18*i+10];
2570: alpha12 = x[18*i+11];
2571: alpha13 = x[18*i+12];
2572: alpha14 = x[18*i+13];
2573: alpha15 = x[18*i+14];
2574: alpha16 = x[18*i+15];
2575: alpha17 = x[18*i+16];
2576: alpha18 = x[18*i+17];
2577: while (n-->0) {
2578: y[18*(*idx)] += alpha1*(*v);
2579: y[18*(*idx)+1] += alpha2*(*v);
2580: y[18*(*idx)+2] += alpha3*(*v);
2581: y[18*(*idx)+3] += alpha4*(*v);
2582: y[18*(*idx)+4] += alpha5*(*v);
2583: y[18*(*idx)+5] += alpha6*(*v);
2584: y[18*(*idx)+6] += alpha7*(*v);
2585: y[18*(*idx)+7] += alpha8*(*v);
2586: y[18*(*idx)+8] += alpha9*(*v);
2587: y[18*(*idx)+9] += alpha10*(*v);
2588: y[18*(*idx)+10] += alpha11*(*v);
2589: y[18*(*idx)+11] += alpha12*(*v);
2590: y[18*(*idx)+12] += alpha13*(*v);
2591: y[18*(*idx)+13] += alpha14*(*v);
2592: y[18*(*idx)+14] += alpha15*(*v);
2593: y[18*(*idx)+15] += alpha16*(*v);
2594: y[18*(*idx)+16] += alpha17*(*v);
2595: y[18*(*idx)+17] += alpha18*(*v);
2596: idx++; v++;
2597: }
2598: }
2599: PetscLogFlops(36.0*a->nz);
2600: VecRestoreArray(xx,(PetscScalar**)&x);
2601: VecRestoreArray(yy,&y);
2602: return(0);
2603: }
2607: PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2608: {
2609: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2610: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2611: const PetscScalar *x,*v;
2612: PetscScalar *y,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
2613: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18;
2614: PetscErrorCode ierr;
2615: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2616: PetscInt n,i,jrow,j;
2619: if (yy != zz) {VecCopy(yy,zz);}
2620: VecGetArray(xx,(PetscScalar**)&x);
2621: VecGetArray(zz,&y);
2622: idx = a->j;
2623: v = a->a;
2624: ii = a->i;
2626: for (i=0; i<m; i++) {
2627: jrow = ii[i];
2628: n = ii[i+1] - jrow;
2629: sum1 = 0.0;
2630: sum2 = 0.0;
2631: sum3 = 0.0;
2632: sum4 = 0.0;
2633: sum5 = 0.0;
2634: sum6 = 0.0;
2635: sum7 = 0.0;
2636: sum8 = 0.0;
2637: sum9 = 0.0;
2638: sum10 = 0.0;
2639: sum11 = 0.0;
2640: sum12 = 0.0;
2641: sum13 = 0.0;
2642: sum14 = 0.0;
2643: sum15 = 0.0;
2644: sum16 = 0.0;
2645: sum17 = 0.0;
2646: sum18 = 0.0;
2647: for (j=0; j<n; j++) {
2648: sum1 += v[jrow]*x[18*idx[jrow]];
2649: sum2 += v[jrow]*x[18*idx[jrow]+1];
2650: sum3 += v[jrow]*x[18*idx[jrow]+2];
2651: sum4 += v[jrow]*x[18*idx[jrow]+3];
2652: sum5 += v[jrow]*x[18*idx[jrow]+4];
2653: sum6 += v[jrow]*x[18*idx[jrow]+5];
2654: sum7 += v[jrow]*x[18*idx[jrow]+6];
2655: sum8 += v[jrow]*x[18*idx[jrow]+7];
2656: sum9 += v[jrow]*x[18*idx[jrow]+8];
2657: sum10 += v[jrow]*x[18*idx[jrow]+9];
2658: sum11 += v[jrow]*x[18*idx[jrow]+10];
2659: sum12 += v[jrow]*x[18*idx[jrow]+11];
2660: sum13 += v[jrow]*x[18*idx[jrow]+12];
2661: sum14 += v[jrow]*x[18*idx[jrow]+13];
2662: sum15 += v[jrow]*x[18*idx[jrow]+14];
2663: sum16 += v[jrow]*x[18*idx[jrow]+15];
2664: sum17 += v[jrow]*x[18*idx[jrow]+16];
2665: sum18 += v[jrow]*x[18*idx[jrow]+17];
2666: jrow++;
2667: }
2668: y[18*i] += sum1;
2669: y[18*i+1] += sum2;
2670: y[18*i+2] += sum3;
2671: y[18*i+3] += sum4;
2672: y[18*i+4] += sum5;
2673: y[18*i+5] += sum6;
2674: y[18*i+6] += sum7;
2675: y[18*i+7] += sum8;
2676: y[18*i+8] += sum9;
2677: y[18*i+9] += sum10;
2678: y[18*i+10] += sum11;
2679: y[18*i+11] += sum12;
2680: y[18*i+12] += sum13;
2681: y[18*i+13] += sum14;
2682: y[18*i+14] += sum15;
2683: y[18*i+15] += sum16;
2684: y[18*i+16] += sum17;
2685: y[18*i+17] += sum18;
2686: }
2688: PetscLogFlops(36.0*a->nz);
2689: VecRestoreArray(xx,(PetscScalar**)&x);
2690: VecRestoreArray(zz,&y);
2691: return(0);
2692: }
2696: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz)
2697: {
2698: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2699: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2700: const PetscScalar *x,*v;
2701: PetscScalar *y,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2702: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18;
2703: PetscErrorCode ierr;
2704: const PetscInt m = b->AIJ->rmap->n,*idx;
2705: PetscInt n,i;
2708: if (yy != zz) {VecCopy(yy,zz);}
2709: VecGetArray(xx,(PetscScalar**)&x);
2710: VecGetArray(zz,&y);
2711: for (i=0; i<m; i++) {
2712: idx = a->j + a->i[i] ;
2713: v = a->a + a->i[i] ;
2714: n = a->i[i+1] - a->i[i];
2715: alpha1 = x[18*i];
2716: alpha2 = x[18*i+1];
2717: alpha3 = x[18*i+2];
2718: alpha4 = x[18*i+3];
2719: alpha5 = x[18*i+4];
2720: alpha6 = x[18*i+5];
2721: alpha7 = x[18*i+6];
2722: alpha8 = x[18*i+7];
2723: alpha9 = x[18*i+8];
2724: alpha10 = x[18*i+9];
2725: alpha11 = x[18*i+10];
2726: alpha12 = x[18*i+11];
2727: alpha13 = x[18*i+12];
2728: alpha14 = x[18*i+13];
2729: alpha15 = x[18*i+14];
2730: alpha16 = x[18*i+15];
2731: alpha17 = x[18*i+16];
2732: alpha18 = x[18*i+17];
2733: while (n-->0) {
2734: y[18*(*idx)] += alpha1*(*v);
2735: y[18*(*idx)+1] += alpha2*(*v);
2736: y[18*(*idx)+2] += alpha3*(*v);
2737: y[18*(*idx)+3] += alpha4*(*v);
2738: y[18*(*idx)+4] += alpha5*(*v);
2739: y[18*(*idx)+5] += alpha6*(*v);
2740: y[18*(*idx)+6] += alpha7*(*v);
2741: y[18*(*idx)+7] += alpha8*(*v);
2742: y[18*(*idx)+8] += alpha9*(*v);
2743: y[18*(*idx)+9] += alpha10*(*v);
2744: y[18*(*idx)+10] += alpha11*(*v);
2745: y[18*(*idx)+11] += alpha12*(*v);
2746: y[18*(*idx)+12] += alpha13*(*v);
2747: y[18*(*idx)+13] += alpha14*(*v);
2748: y[18*(*idx)+14] += alpha15*(*v);
2749: y[18*(*idx)+15] += alpha16*(*v);
2750: y[18*(*idx)+16] += alpha17*(*v);
2751: y[18*(*idx)+17] += alpha18*(*v);
2752: idx++; v++;
2753: }
2754: }
2755: PetscLogFlops(36.0*a->nz);
2756: VecRestoreArray(xx,(PetscScalar**)&x);
2757: VecRestoreArray(zz,&y);
2758: return(0);
2759: }
2763: PetscErrorCode MatMult_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2764: {
2765: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2766: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2767: const PetscScalar *x,*v;
2768: PetscScalar *y,*sums;
2769: PetscErrorCode ierr;
2770: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2771: PetscInt n,i,jrow,j,dof = b->dof,k;
2774: VecGetArray(xx,(PetscScalar**)&x);
2775: VecSet(yy,0.0);
2776: VecGetArray(yy,&y);
2777: idx = a->j;
2778: v = a->a;
2779: ii = a->i;
2781: for (i=0; i<m; i++) {
2782: jrow = ii[i];
2783: n = ii[i+1] - jrow;
2784: sums = y + dof*i;
2785: for (j=0; j<n; j++) {
2786: for (k=0; k<dof; k++) {
2787: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2788: }
2789: jrow++;
2790: }
2791: }
2793: PetscLogFlops(2.0*dof*a->nz);
2794: VecRestoreArray(xx,(PetscScalar**)&x);
2795: VecRestoreArray(yy,&y);
2796: return(0);
2797: }
2801: PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2802: {
2803: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2804: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2805: const PetscScalar *x,*v;
2806: PetscScalar *y,*sums;
2807: PetscErrorCode ierr;
2808: const PetscInt m = b->AIJ->rmap->n,*idx,*ii;
2809: PetscInt n,i,jrow,j,dof = b->dof,k;
2812: if (yy != zz) {VecCopy(yy,zz);}
2813: VecGetArray(xx,(PetscScalar**)&x);
2814: VecGetArray(zz,&y);
2815: idx = a->j;
2816: v = a->a;
2817: ii = a->i;
2819: for (i=0; i<m; i++) {
2820: jrow = ii[i];
2821: n = ii[i+1] - jrow;
2822: sums = y + dof*i;
2823: for (j=0; j<n; j++) {
2824: for (k=0; k<dof; k++) {
2825: sums[k] += v[jrow]*x[dof*idx[jrow]+k];
2826: }
2827: jrow++;
2828: }
2829: }
2831: PetscLogFlops(2.0*dof*a->nz);
2832: VecRestoreArray(xx,(PetscScalar**)&x);
2833: VecRestoreArray(zz,&y);
2834: return(0);
2835: }
2839: PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A,Vec xx,Vec yy)
2840: {
2841: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2842: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2843: const PetscScalar *x,*v,*alpha;
2844: PetscScalar *y;
2845: PetscErrorCode ierr;
2846: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2847: PetscInt n,i,k;
2850: VecGetArray(xx,(PetscScalar**)&x);
2851: VecSet(yy,0.0);
2852: VecGetArray(yy,&y);
2853: for (i=0; i<m; i++) {
2854: idx = a->j + a->i[i] ;
2855: v = a->a + a->i[i] ;
2856: n = a->i[i+1] - a->i[i];
2857: alpha = x + dof*i;
2858: while (n-->0) {
2859: for (k=0; k<dof; k++) {
2860: y[dof*(*idx)+k] += alpha[k]*(*v);
2861: }
2862: idx++; v++;
2863: }
2864: }
2865: PetscLogFlops(2.0*dof*a->nz);
2866: VecRestoreArray(xx,(PetscScalar**)&x);
2867: VecRestoreArray(yy,&y);
2868: return(0);
2869: }
2873: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
2874: {
2875: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2876: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2877: const PetscScalar *x,*v,*alpha;
2878: PetscScalar *y;
2879: PetscErrorCode ierr;
2880: const PetscInt m = b->AIJ->rmap->n,*idx,dof = b->dof;
2881: PetscInt n,i,k;
2884: if (yy != zz) {VecCopy(yy,zz);}
2885: VecGetArray(xx,(PetscScalar**)&x);
2886: VecGetArray(zz,&y);
2887: for (i=0; i<m; i++) {
2888: idx = a->j + a->i[i] ;
2889: v = a->a + a->i[i] ;
2890: n = a->i[i+1] - a->i[i];
2891: alpha = x + dof*i;
2892: while (n-->0) {
2893: for (k=0; k<dof; k++) {
2894: y[dof*(*idx)+k] += alpha[k]*(*v);
2895: }
2896: idx++; v++;
2897: }
2898: }
2899: PetscLogFlops(2.0*dof*a->nz);
2900: VecRestoreArray(xx,(PetscScalar**)&x);
2901: VecRestoreArray(zz,&y);
2902: return(0);
2903: }
2905: /*===================================================================================*/
2908: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2909: {
2910: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2914: /* start the scatter */
2915: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2916: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2917: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2918: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2919: return(0);
2920: }
2924: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2925: {
2926: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2930: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2931: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2932: VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2933: VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);
2934: return(0);
2935: }
2939: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2940: {
2941: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2945: /* start the scatter */
2946: VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2947: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2948: VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);
2949: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2950: return(0);
2951: }
2955: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2956: {
2957: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2961: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2962: VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2963: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2964: VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);
2965: return(0);
2966: }
2968: /* ----------------------------------------------------------------*/
2971: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2972: {
2973: /* This routine requires testing -- but it's getting better. */
2974: PetscErrorCode ierr;
2975: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2976: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2977: Mat P=pp->AIJ;
2978: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2979: PetscInt *pti,*ptj,*ptJ;
2980: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2981: const PetscInt an=A->cmap->N,am=A->rmap->N,pn=P->cmap->N,pm=P->rmap->N,ppdof=pp->dof;
2982: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi,cn;
2983: MatScalar *ca;
2984: const PetscInt *pi = p->i,*pj = p->j,*pjj,*ai=a->i,*aj=a->j,*ajj;
2987: /* Start timer */
2988: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2990: /* Get ij structure of P^T */
2991: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2993: cn = pn*ppdof;
2994: /* Allocate ci array, arrays for fill computation and */
2995: /* free space for accumulating nonzero column info */
2996: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2997: ci[0] = 0;
2999: /* Work arrays for rows of P^T*A */
3000: PetscMalloc4(an,PetscInt,&ptadenserow,an,PetscInt,&ptasparserow,cn,PetscInt,&denserow,cn,PetscInt,&sparserow);
3002: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
3003: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
3004: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
3005: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
3006: current_space = free_space;
3008: /* Determine symbolic info for each row of C: */
3009: for (i=0;i<pn;i++) {
3010: ptnzi = pti[i+1] - pti[i];
3011: ptJ = ptj + pti[i];
3012: for (dof=0;dof<ppdof;dof++) {
3013: ptanzi = 0;
3014: /* Determine symbolic row of PtA: */
3015: for (j=0;j<ptnzi;j++) {
3016: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
3017: arow = ptJ[j]*ppdof + dof;
3018: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
3019: anzj = ai[arow+1] - ai[arow];
3020: ajj = aj + ai[arow];
3021: for (k=0;k<anzj;k++) {
3022: if (!ptadenserow[ajj[k]]) {
3023: ptadenserow[ajj[k]] = -1;
3024: ptasparserow[ptanzi++] = ajj[k];
3025: }
3026: }
3027: }
3028: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
3029: ptaj = ptasparserow;
3030: cnzi = 0;
3031: for (j=0;j<ptanzi;j++) {
3032: /* Get offset within block of P */
3033: pshift = *ptaj%ppdof;
3034: /* Get block row of P */
3035: prow = (*ptaj++)/ppdof; /* integer division */
3036: /* P has same number of nonzeros per row as the compressed form */
3037: pnzj = pi[prow+1] - pi[prow];
3038: pjj = pj + pi[prow];
3039: for (k=0;k<pnzj;k++) {
3040: /* Locations in C are shifted by the offset within the block */
3041: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
3042: if (!denserow[pjj[k]*ppdof+pshift]) {
3043: denserow[pjj[k]*ppdof+pshift] = -1;
3044: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
3045: }
3046: }
3047: }
3049: /* sort sparserow */
3050: PetscSortInt(cnzi,sparserow);
3051:
3052: /* If free space is not available, make more free space */
3053: /* Double the amount of total space in the list */
3054: if (current_space->local_remaining<cnzi) {
3055: PetscFreeSpaceGet(cnzi+current_space->total_array_size,¤t_space);
3056: }
3058: /* Copy data into free space, and zero out denserows */
3059: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
3060: current_space->array += cnzi;
3061: current_space->local_used += cnzi;
3062: current_space->local_remaining -= cnzi;
3064: for (j=0;j<ptanzi;j++) {
3065: ptadenserow[ptasparserow[j]] = 0;
3066: }
3067: for (j=0;j<cnzi;j++) {
3068: denserow[sparserow[j]] = 0;
3069: }
3070: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
3071: /* For now, we will recompute what is needed. */
3072: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
3073: }
3074: }
3075: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
3076: /* Allocate space for cj, initialize cj, and */
3077: /* destroy list of free space and other temporary array(s) */
3078: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
3079: PetscFreeSpaceContiguous(&free_space,cj);
3080: PetscFree4(ptadenserow,ptasparserow,denserow,sparserow);
3081:
3082: /* Allocate space for ca */
3083: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
3084: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
3085:
3086: /* put together the new matrix */
3087: MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);
3089: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3090: /* Since these are PETSc arrays, change flags to free them as necessary. */
3091: c = (Mat_SeqAIJ *)((*C)->data);
3092: c->free_a = PETSC_TRUE;
3093: c->free_ij = PETSC_TRUE;
3094: c->nonew = 0;
3096: /* Clean up. */
3097: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
3099: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
3100: return(0);
3101: }
3105: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
3106: {
3107: /* This routine requires testing -- first draft only */
3108: PetscErrorCode ierr;
3109: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
3110: Mat P=pp->AIJ;
3111: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
3112: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
3113: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
3114: const PetscInt *ai=a->i,*aj=a->j,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
3115: const PetscInt *ci=c->i,*cj=c->j,*cjj;
3116: const PetscInt am=A->rmap->N,cn=C->cmap->N,cm=C->rmap->N,ppdof=pp->dof;
3117: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow,*apj,*apjdense;
3118: const MatScalar *aa=a->a,*pa=p->a,*pA=p->a,*paj;
3119: MatScalar *ca=c->a,*caj,*apa;
3122: /* Allocate temporary array for storage of one row of A*P */
3123: PetscMalloc3(cn,MatScalar,&apa,cn,PetscInt,&apj,cn,PetscInt,&apjdense);
3124: PetscMemzero(apa,cn*sizeof(MatScalar));
3125: PetscMemzero(apj,cn*sizeof(PetscInt));
3126: PetscMemzero(apjdense,cn*sizeof(PetscInt));
3128: /* Clear old values in C */
3129: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
3131: for (i=0;i<am;i++) {
3132: /* Form sparse row of A*P */
3133: anzi = ai[i+1] - ai[i];
3134: apnzj = 0;
3135: for (j=0;j<anzi;j++) {
3136: /* Get offset within block of P */
3137: pshift = *aj%ppdof;
3138: /* Get block row of P */
3139: prow = *aj++/ppdof; /* integer division */
3140: pnzj = pi[prow+1] - pi[prow];
3141: pjj = pj + pi[prow];
3142: paj = pa + pi[prow];
3143: for (k=0;k<pnzj;k++) {
3144: poffset = pjj[k]*ppdof+pshift;
3145: if (!apjdense[poffset]) {
3146: apjdense[poffset] = -1;
3147: apj[apnzj++] = poffset;
3148: }
3149: apa[poffset] += (*aa)*paj[k];
3150: }
3151: PetscLogFlops(2.0*pnzj);
3152: aa++;
3153: }
3155: /* Sort the j index array for quick sparse axpy. */
3156: /* Note: a array does not need sorting as it is in dense storage locations. */
3157: PetscSortInt(apnzj,apj);
3159: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
3160: prow = i/ppdof; /* integer division */
3161: pshift = i%ppdof;
3162: poffset = pi[prow];
3163: pnzi = pi[prow+1] - poffset;
3164: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
3165: pJ = pj+poffset;
3166: pA = pa+poffset;
3167: for (j=0;j<pnzi;j++) {
3168: crow = (*pJ)*ppdof+pshift;
3169: cjj = cj + ci[crow];
3170: caj = ca + ci[crow];
3171: pJ++;
3172: /* Perform sparse axpy operation. Note cjj includes apj. */
3173: for (k=0,nextap=0;nextap<apnzj;k++) {
3174: if (cjj[k]==apj[nextap]) {
3175: caj[k] += (*pA)*apa[apj[nextap++]];
3176: }
3177: }
3178: PetscLogFlops(2.0*apnzj);
3179: pA++;
3180: }
3182: /* Zero the current row info for A*P */
3183: for (j=0;j<apnzj;j++) {
3184: apa[apj[j]] = 0.;
3185: apjdense[apj[j]] = 0;
3186: }
3187: }
3189: /* Assemble the final matrix and clean up */
3190: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
3191: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
3192: PetscFree3(apa,apj,apjdense);
3193: return(0);
3194: }
3198: PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
3199: {
3200: PetscErrorCode ierr;
3203: /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */
3204: MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);
3205: ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);
3206: return(0);
3207: }
3211: PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C)
3212: {
3214: SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet");
3215: return(0);
3216: }
3221: PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3222: {
3223: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
3224: Mat a = b->AIJ,B;
3225: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
3226: PetscErrorCode ierr;
3227: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
3228: PetscInt *cols;
3229: PetscScalar *vals;
3232: MatGetSize(a,&m,&n);
3233: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
3234: for (i=0; i<m; i++) {
3235: nmax = PetscMax(nmax,aij->ilen[i]);
3236: for (j=0; j<dof; j++) {
3237: ilen[dof*i+j] = aij->ilen[i];
3238: }
3239: }
3240: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
3241: PetscFree(ilen);
3242: PetscMalloc(nmax*sizeof(PetscInt),&icols);
3243: ii = 0;
3244: for (i=0; i<m; i++) {
3245: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3246: for (j=0; j<dof; j++) {
3247: for (k=0; k<ncols; k++) {
3248: icols[k] = dof*cols[k]+j;
3249: }
3250: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3251: ii++;
3252: }
3253: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
3254: }
3255: PetscFree(icols);
3256: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3257: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3259: if (reuse == MAT_REUSE_MATRIX) {
3260: MatHeaderReplace(A,B);
3261: } else {
3262: *newmat = B;
3263: }
3264: return(0);
3265: }
3268: #include ../src/mat/impls/aij/mpi/mpiaij.h
3273: PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
3274: {
3275: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
3276: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
3277: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
3278: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
3279: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
3280: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
3281: PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0;
3282: PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL;
3283: PetscInt rstart,cstart,*garray,ii,k;
3284: PetscErrorCode ierr;
3285: PetscScalar *vals,*ovals;
3288: PetscMalloc2(A->rmap->n,PetscInt,&dnz,A->rmap->n,PetscInt,&onz);
3289: for (i=0; i<A->rmap->n/dof; i++) {
3290: nmax = PetscMax(nmax,AIJ->ilen[i]);
3291: onmax = PetscMax(onmax,OAIJ->ilen[i]);
3292: for (j=0; j<dof; j++) {
3293: dnz[dof*i+j] = AIJ->ilen[i];
3294: onz[dof*i+j] = OAIJ->ilen[i];
3295: }
3296: }
3297: MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,dnz,0,onz,&B);
3298: PetscFree2(dnz,onz);
3300: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
3301: rstart = dof*maij->A->rmap->rstart;
3302: cstart = dof*maij->A->cmap->rstart;
3303: garray = mpiaij->garray;
3305: ii = rstart;
3306: for (i=0; i<A->rmap->n/dof; i++) {
3307: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3308: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3309: for (j=0; j<dof; j++) {
3310: for (k=0; k<ncols; k++) {
3311: icols[k] = cstart + dof*cols[k]+j;
3312: }
3313: for (k=0; k<oncols; k++) {
3314: oicols[k] = dof*garray[ocols[k]]+j;
3315: }
3316: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
3317: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
3318: ii++;
3319: }
3320: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
3321: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
3322: }
3323: PetscFree2(icols,oicols);
3325: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3326: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3328: if (reuse == MAT_REUSE_MATRIX) {
3329: PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */
3330: ((PetscObject)A)->refct = 1;
3331: MatHeaderReplace(A,B);
3332: ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */
3333: } else {
3334: *newmat = B;
3335: }
3336: return(0);
3337: }
3341: /* ---------------------------------------------------------------------------------- */
3344: /*@C
3345: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
3346: operations for multicomponent problems. It interpolates each component the same
3347: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
3348: and MATMPIAIJ for distributed matrices.
3350: Collective
3352: Input Parameters:
3353: + A - the AIJ matrix describing the action on blocks
3354: - dof - the block size (number of components per node)
3356: Output Parameter:
3357: . maij - the new MAIJ matrix
3359: Operations provided:
3360: + MatMult
3361: . MatMultTranspose
3362: . MatMultAdd
3363: . MatMultTransposeAdd
3364: - MatView
3366: Level: advanced
3368: .seealso: MatMAIJGetAIJ(), MatMAIJRedimension()
3369: @*/
3370: PetscErrorCode MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
3371: {
3373: PetscMPIInt size;
3374: PetscInt n;
3375: Mat B;
3378: PetscObjectReference((PetscObject)A);
3380: if (dof == 1) {
3381: *maij = A;
3382: } else {
3383: MatCreate(((PetscObject)A)->comm,&B);
3384: MatSetSizes(B,dof*A->rmap->n,dof*A->cmap->n,dof*A->rmap->N,dof*A->cmap->N);
3385: PetscLayoutSetBlockSize(B->rmap,dof);
3386: PetscLayoutSetBlockSize(B->cmap,dof);
3387: PetscLayoutSetUp(B->rmap);
3388: PetscLayoutSetUp(B->cmap);
3389: B->assembled = PETSC_TRUE;
3391: MPI_Comm_size(((PetscObject)A)->comm,&size);
3392: if (size == 1) {
3393: Mat_SeqMAIJ *b;
3395: MatSetType(B,MATSEQMAIJ);
3396: B->ops->destroy = MatDestroy_SeqMAIJ;
3397: B->ops->view = MatView_SeqMAIJ;
3398: b = (Mat_SeqMAIJ*)B->data;
3399: b->dof = dof;
3400: b->AIJ = A;
3401: if (dof == 2) {
3402: B->ops->mult = MatMult_SeqMAIJ_2;
3403: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
3404: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
3405: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
3406: } else if (dof == 3) {
3407: B->ops->mult = MatMult_SeqMAIJ_3;
3408: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
3409: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
3410: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
3411: } else if (dof == 4) {
3412: B->ops->mult = MatMult_SeqMAIJ_4;
3413: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
3414: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
3415: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
3416: } else if (dof == 5) {
3417: B->ops->mult = MatMult_SeqMAIJ_5;
3418: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
3419: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
3420: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
3421: } else if (dof == 6) {
3422: B->ops->mult = MatMult_SeqMAIJ_6;
3423: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
3424: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
3425: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
3426: } else if (dof == 7) {
3427: B->ops->mult = MatMult_SeqMAIJ_7;
3428: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
3429: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
3430: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
3431: } else if (dof == 8) {
3432: B->ops->mult = MatMult_SeqMAIJ_8;
3433: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
3434: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
3435: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
3436: } else if (dof == 9) {
3437: B->ops->mult = MatMult_SeqMAIJ_9;
3438: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
3439: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
3440: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
3441: } else if (dof == 10) {
3442: B->ops->mult = MatMult_SeqMAIJ_10;
3443: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
3444: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
3445: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
3446: } else if (dof == 11) {
3447: B->ops->mult = MatMult_SeqMAIJ_11;
3448: B->ops->multadd = MatMultAdd_SeqMAIJ_11;
3449: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11;
3450: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11;
3451: } else if (dof == 16) {
3452: B->ops->mult = MatMult_SeqMAIJ_16;
3453: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
3454: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
3455: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
3456: } else if (dof == 18) {
3457: B->ops->mult = MatMult_SeqMAIJ_18;
3458: B->ops->multadd = MatMultAdd_SeqMAIJ_18;
3459: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18;
3460: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18;
3461: } else {
3462: B->ops->mult = MatMult_SeqMAIJ_N;
3463: B->ops->multadd = MatMultAdd_SeqMAIJ_N;
3464: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N;
3465: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N;
3466: }
3467: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
3468: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
3469: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
3470: } else {
3471: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
3472: Mat_MPIMAIJ *b;
3473: IS from,to;
3474: Vec gvec;
3475: PetscInt *garray,i;
3477: MatSetType(B,MATMPIMAIJ);
3478: B->ops->destroy = MatDestroy_MPIMAIJ;
3479: B->ops->view = MatView_MPIMAIJ;
3480: b = (Mat_MPIMAIJ*)B->data;
3481: b->dof = dof;
3482: b->A = A;
3483: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
3484: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
3486: VecGetSize(mpiaij->lvec,&n);
3487: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
3488: VecSetBlockSize(b->w,dof);
3490: /* create two temporary Index sets for build scatter gather */
3491: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
3492: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
3493: ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);
3494: PetscFree(garray);
3495: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
3497: /* create temporary global vector to generate scatter context */
3498: VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap->n,dof*A->cmap->N,PETSC_NULL,&gvec);
3499: VecSetBlockSize(gvec,dof);
3501: /* generate the scatter context */
3502: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
3504: ISDestroy(from);
3505: ISDestroy(to);
3506: VecDestroy(gvec);
3508: B->ops->mult = MatMult_MPIMAIJ_dof;
3509: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
3510: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
3511: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
3512: B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ;
3513: B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ;
3514: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
3515: }
3516: *maij = B;
3517: MatView_Private(B);
3518: }
3519: return(0);
3520: }