Actual source code: csrperm.c
1: #define PETSCMAT_DLL
3: /*
4: Defines basic operations for the MATSEQCSRPERM matrix class.
5: This class is derived from the MATSEQAIJ class and retains the
6: compressed row storage (aka Yale sparse matrix format) but augments
7: it with some permutation information that enables some operations
8: to be more vectorizable. A physically rearranged copy of the matrix
9: may be stored if the user desires.
11: Eventually a variety of permutations may be supported.
12: */
14: #include ../src/mat/impls/aij/seq/aij.h
16: #define NDIM 512
17: /* NDIM specifies how many rows at a time we should work with when
18: * performing the vectorized mat-vec. This depends on various factors
19: * such as vector register length, etc., and I really need to add a
20: * way for the user (or the library) to tune this. I'm setting it to
21: * 512 for now since that is what Ed D'Azevedo was using in his Fortran
22: * routines. */
24: typedef struct {
25: PetscInt ngroup;
26: PetscInt *xgroup;
27: /* Denotes where groups of rows with same number of nonzeros
28: * begin and end, i.e., xgroup[i] gives us the position in iperm[]
29: * where the ith group begins. */
30: PetscInt *nzgroup; /* how many nonzeros each row that is a member of group i has. */
31: PetscInt *iperm; /* The permutation vector. */
33: /* Flag that indicates whether we need to clean up permutation
34: * information during the MatDestroy. */
35: PetscTruth CleanUpCSRPERM;
37: /* Some of this stuff is for Ed's recursive triangular solve.
38: * I'm not sure what I need yet. */
39: PetscInt blocksize;
40: PetscInt nstep;
41: PetscInt *jstart_list;
42: PetscInt *jend_list;
43: PetscInt *action_list;
44: PetscInt *ngroup_list;
45: PetscInt **ipointer_list;
46: PetscInt **xgroup_list;
47: PetscInt **nzgroup_list;
48: PetscInt **iperm_list;
49: } Mat_SeqCSRPERM;
56: PetscErrorCode MatConvert_SeqCSRPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
57: {
58: /* This routine is only called to convert a MATCSRPERM to its base PETSc type, */
59: /* so we will ignore 'MatType type'. */
61: Mat B = *newmat;
62: Mat_SeqCSRPERM *csrperm=(Mat_SeqCSRPERM*)A->spptr;
65: if (reuse == MAT_INITIAL_MATRIX) {
66: MatDuplicate(A,MAT_COPY_VALUES,&B);
67: }
69: /* Reset the original function pointers. */
70: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
71: B->ops->destroy = MatDestroy_SeqAIJ;
72: B->ops->duplicate = MatDuplicate_SeqAIJ;
74: /* Free everything in the Mat_SeqCSRPERM data structure.
75: * We don't free the Mat_SeqCSRPERM struct itself, as this will
76: * cause problems later when MatDestroy() tries to free it. */
77: if(csrperm->CleanUpCSRPERM) {
78: PetscFree(csrperm->xgroup);
79: PetscFree(csrperm->nzgroup);
80: PetscFree(csrperm->iperm);
81: }
83: /* Change the type of B to MATSEQAIJ. */
84: PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
85:
86: *newmat = B;
87: return(0);
88: }
93: PetscErrorCode MatDestroy_SeqCSRPERM(Mat A)
94: {
96: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
99: /* Free everything in the Mat_SeqCSRPERM data structure.
100: * Note that we don't need to free the Mat_SeqCSRPERM struct
101: * itself, as MatDestroy() will do so. */
102: if(csrperm->CleanUpCSRPERM) {
103: PetscFree(csrperm->xgroup);
104: PetscFree(csrperm->nzgroup);
105: PetscFree(csrperm->iperm);
106: }
108: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
109: * to destroy everything that remains. */
110: PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
111: /* Note that I don't call MatSetType(). I believe this is because that
112: * is only to be called when *building* a matrix. I could be wrong, but
113: * that is how things work for the SuperLU matrix class. */
114: MatDestroy_SeqAIJ(A);
115: return(0);
116: }
118: PetscErrorCode MatDuplicate_SeqCSRPERM(Mat A, MatDuplicateOption op, Mat *M)
119: {
121: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
122: Mat_SeqCSRPERM *csrperm_dest = (Mat_SeqCSRPERM *) (*M)->spptr;
125: MatDuplicate_SeqAIJ(A,op,M);
126: PetscMemcpy((*M)->spptr,csrperm,sizeof(Mat_SeqCSRPERM));
127: /* Allocate space for, and copy the grouping and permutation info.
128: * I note that when the groups are initially determined in
129: * SeqCSRPERM_create_perm, xgroup and nzgroup may be sized larger than
130: * necessary. But at this point, we know how large they need to be, and
131: * allocate only the necessary amount of memory. So the duplicated matrix
132: * may actually use slightly less storage than the original! */
133: PetscMalloc(A->rmap->n*sizeof(PetscInt), csrperm_dest->iperm);
134: PetscMalloc((csrperm->ngroup+1)*sizeof(PetscInt), csrperm_dest->xgroup);
135: PetscMalloc((csrperm->ngroup)*sizeof(PetscInt), csrperm_dest->nzgroup);
136: PetscMemcpy(csrperm_dest->iperm,csrperm->iperm,sizeof(PetscInt)*A->rmap->n);
137: PetscMemcpy(csrperm_dest->xgroup,csrperm->xgroup,sizeof(PetscInt)*(csrperm->ngroup+1));
138: PetscMemcpy(csrperm_dest->nzgroup,csrperm->nzgroup,sizeof(PetscInt)*csrperm->ngroup);
139: return(0);
140: }
144: PetscErrorCode SeqCSRPERM_create_perm(Mat A)
145: {
146: PetscInt m; /* Number of rows in the matrix. */
147: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
148: PetscInt *ia; /* From the CSR representation; points to the beginning of each row. */
149: PetscInt maxnz; /* Maximum number of nonzeros in any row. */
150: PetscInt *rows_in_bucket;
151: /* To construct the permutation, we sort each row into one of maxnz
152: * buckets based on how many nonzeros are in the row. */
153: PetscInt nz;
154: PetscInt *nz_in_row; /* the number of nonzero elements in row k. */
155: PetscInt *ipnz;
156: /* When constructing the iperm permutation vector,
157: * ipnz[nz] is used to point to the next place in the permutation vector
158: * that a row with nz nonzero elements should be placed.*/
159: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
160: /* Points to the MATSEQCSRPERM-specific data in the matrix B. */
162: PetscInt i, ngroup, istart, ipos;
164: /* I really ought to put something in here to check if B is of
165: * type MATSEQCSRPERM and return an error code if it is not.
166: * Come back and do this! */
168: m = A->rmap->n;
169: ia = a->i;
170:
171: /* Allocate the arrays that will hold the permutation vector. */
172: PetscMalloc(m*sizeof(PetscInt), &csrperm->iperm);
174: /* Allocate some temporary work arrays that will be used in
175: * calculating the permuation vector and groupings. */
176: PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
177: PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
178: PetscMalloc(m*sizeof(PetscInt), &nz_in_row);
180: /* Now actually figure out the permutation and grouping. */
182: /* First pass: Determine number of nonzeros in each row, maximum
183: * number of nonzeros in any row, and how many rows fall into each
184: * "bucket" of rows with same number of nonzeros. */
185: maxnz = 0;
186: for (i=0; i<m; i++) {
187: nz_in_row[i] = ia[i+1]-ia[i];
188: if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
189: }
191: for (i=0; i<=maxnz; i++) {
192: rows_in_bucket[i] = 0;
193: }
194: for (i=0; i<m; i++) {
195: nz = nz_in_row[i];
196: rows_in_bucket[nz]++;
197: }
199: /* Allocate space for the grouping info. There will be at most (maxnz + 1)
200: * groups. (It is maxnz + 1 instead of simply maxnz because there may be
201: * rows with no nonzero elements.) If there are (maxnz + 1) groups,
202: * then xgroup[] must consist of (maxnz + 2) elements, since the last
203: * element of xgroup will tell us where the (maxnz + 1)th group ends.
204: * We allocate space for the maximum number of groups;
205: * that is potentially a little wasteful, but not too much so.
206: * Perhaps I should fix it later. */
207: PetscMalloc((maxnz+2)*sizeof(PetscInt), &csrperm->xgroup);
208: PetscMalloc((maxnz+1)*sizeof(PetscInt), &csrperm->nzgroup);
210: /* Second pass. Look at what is in the buckets and create the groupings.
211: * Note that it is OK to have a group of rows with no non-zero values. */
212: ngroup = 0;
213: istart = 0;
214: for (i=0; i<=maxnz; i++) {
215: if (rows_in_bucket[i] > 0) {
216: csrperm->nzgroup[ngroup] = i;
217: csrperm->xgroup[ngroup] = istart;
218: ngroup++;
219: istart += rows_in_bucket[i];
220: }
221: }
223: csrperm->xgroup[ngroup] = istart;
224: csrperm->ngroup = ngroup;
226: /* Now fill in the permutation vector iperm. */
227: ipnz[0] = 0;
228: for (i=0; i<maxnz; i++) {
229: ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
230: }
232: for (i=0; i<m; i++) {
233: nz = nz_in_row[i];
234: ipos = ipnz[nz];
235: csrperm->iperm[ipos] = i;
236: ipnz[nz]++;
237: }
239: /* Clean up temporary work arrays. */
240: PetscFree(rows_in_bucket);
241: PetscFree(ipnz);
242: PetscFree(nz_in_row);
244: /* Since we've allocated some memory to hold permutation info,
245: * flip the CleanUpCSRPERM flag to true. */
246: csrperm->CleanUpCSRPERM = PETSC_TRUE;
247: return(0);
248: }
253: PetscErrorCode MatAssemblyEnd_SeqCSRPERM(Mat A, MatAssemblyType mode)
254: {
256: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
259: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
260:
261: /* Since a MATSEQCSRPERM matrix is really just a MATSEQAIJ with some
262: * extra information, call the AssemblyEnd routine for a MATSEQAIJ.
263: * I'm not sure if this is the best way to do this, but it avoids
264: * a lot of code duplication.
265: * I also note that currently MATSEQCSRPERM doesn't know anything about
266: * the Mat_CompressedRow data structure that SeqAIJ now uses when there
267: * are many zero rows. If the SeqAIJ assembly end routine decides to use
268: * this, this may break things. (Don't know... haven't looked at it.) */
269: a->inode.use = PETSC_FALSE;
270: MatAssemblyEnd_SeqAIJ(A, mode);
272: /* Now calculate the permutation and grouping information. */
273: SeqCSRPERM_create_perm(A);
274: return(0);
275: }
279: PetscErrorCode MatMult_SeqCSRPERM(Mat A,Vec xx,Vec yy)
280: {
281: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
282: PetscScalar *x,*y;
283: const MatScalar *aa;
284: PetscErrorCode ierr;
285: PetscInt *aj,*ai;
286: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking))
287: PetscInt i,j;
288: #endif
290: /* Variables that don't appear in MatMult_SeqAIJ. */
291: Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
292: PetscInt *iperm; /* Points to the permutation vector. */
293: PetscInt *xgroup;
294: /* Denotes where groups of rows with same number of nonzeros
295: * begin and end in iperm. */
296: PetscInt *nzgroup;
297: PetscInt ngroup;
298: PetscInt igroup;
299: PetscInt jstart,jend;
300: /* jstart is used in loops to denote the position in iperm where a
301: * group starts; jend denotes the position where it ends.
302: * (jend + 1 is where the next group starts.) */
303: PetscInt iold,nz;
304: PetscInt istart,iend,isize;
305: PetscInt ipos;
306: PetscScalar yp[NDIM];
307: PetscInt ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */
309: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
310: #pragma disjoint(*x,*y,*aa)
311: #endif
314: VecGetArray(xx,&x);
315: VecGetArray(yy,&y);
316: aj = a->j; /* aj[k] gives column index for element aa[k]. */
317: aa = a->a; /* Nonzero elements stored row-by-row. */
318: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
320: /* Get the info we need about the permutations and groupings. */
321: iperm = csrperm->iperm;
322: ngroup = csrperm->ngroup;
323: xgroup = csrperm->xgroup;
324: nzgroup = csrperm->nzgroup;
325:
326: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking)
327: fortranmultcsrperm_(&m,x,ii,aj,aa,y);
328: #else
330: for (igroup=0; igroup<ngroup; igroup++) {
331: jstart = xgroup[igroup];
332: jend = xgroup[igroup+1] - 1;
333: nz = nzgroup[igroup];
335: /* Handle the special cases where the number of nonzeros per row
336: * in the group is either 0 or 1. */
337: if (nz == 0) {
338: for (i=jstart; i<=jend; i++) {
339: y[iperm[i]] = 0.0;
340: }
341: } else if (nz == 1) {
342: for (i=jstart; i<=jend; i++) {
343: iold = iperm[i];
344: ipos = ai[iold];
345: y[iold] = aa[ipos] * x[aj[ipos]];
346: }
347: } else {
348:
349: /* We work our way through the current group in chunks of NDIM rows
350: * at a time. */
352: for (istart=jstart; istart<=jend; istart+=NDIM) {
353: /* Figure out where the chunk of 'isize' rows ends in iperm.
354: * 'isize may of course be less than NDIM for the last chunk. */
355: iend = istart + (NDIM - 1);
356: if (iend > jend) { iend = jend; }
357: isize = iend - istart + 1;
359: /* Initialize the yp[] array that will be used to hold part of
360: * the permuted results vector, and figure out where in aa each
361: * row of the chunk will begin. */
362: for (i=0; i<isize; i++) {
363: iold = iperm[istart + i];
364: /* iold is a row number from the matrix A *before* reordering. */
365: ip[i] = ai[iold];
366: /* ip[i] tells us where the ith row of the chunk begins in aa. */
367: yp[i] = (PetscScalar) 0.0;
368: }
370: /* If the number of zeros per row exceeds the number of rows in
371: * the chunk, we should vectorize along nz, that is, perform the
372: * mat-vec one row at a time as in the usual CSR case. */
373: if (nz > isize) {
374: #if defined(PETSC_HAVE_CRAYC)
375: #pragma _CRI preferstream
376: #endif
377: for (i=0; i<isize; i++) {
378: #if defined(PETSC_HAVE_CRAYC)
379: #pragma _CRI prefervector
380: #endif
381: for (j=0; j<nz; j++) {
382: ipos = ip[i] + j;
383: yp[i] += aa[ipos] * x[aj[ipos]];
384: }
385: }
386: } else {
387: /* Otherwise, there are enough rows in the chunk to make it
388: * worthwhile to vectorize across the rows, that is, to do the
389: * matvec by operating with "columns" of the chunk. */
390: for (j=0; j<nz; j++) {
391: for(i=0; i<isize; i++) {
392: ipos = ip[i] + j;
393: yp[i] += aa[ipos] * x[aj[ipos]];
394: }
395: }
396: }
398: #if defined(PETSC_HAVE_CRAYC)
399: #pragma _CRI ivdep
400: #endif
401: /* Put results from yp[] into non-permuted result vector y. */
402: for (i=0; i<isize; i++) {
403: y[iperm[istart+i]] = yp[i];
404: }
405: } /* End processing chunk of isize rows of a group. */
406: } /* End handling matvec for chunk with nz > 1. */
407: } /* End loop over igroup. */
408: #endif
409: PetscLogFlops(2.0*a->nz - A->rmap->n);
410: VecRestoreArray(xx,&x);
411: VecRestoreArray(yy,&y);
412: return(0);
413: }
416: /* MatMultAdd_SeqCSRPERM() calculates yy = ww + A * xx.
417: * Note that the names I used to designate the vectors differs from that
418: * used in MatMultAdd_SeqAIJ(). I did this to keep my notation consistent
419: * with the MatMult_SeqCSRPERM() routine, which is very similar to this one. */
420: /*
421: I hate having virtually identical code for the mult and the multadd!!!
422: */
425: PetscErrorCode MatMultAdd_SeqCSRPERM(Mat A,Vec xx,Vec ww,Vec yy)
426: {
427: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
428: PetscScalar *x,*y,*w;
429: const MatScalar *aa;
430: PetscErrorCode ierr;
431: PetscInt *aj,*ai;
432: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
433: PetscInt i,j;
434: #endif
436: /* Variables that don't appear in MatMultAdd_SeqAIJ. */
437: Mat_SeqCSRPERM *csrperm;
438: PetscInt *iperm; /* Points to the permutation vector. */
439: PetscInt *xgroup;
440: /* Denotes where groups of rows with same number of nonzeros
441: * begin and end in iperm. */
442: PetscInt *nzgroup;
443: PetscInt ngroup;
444: PetscInt igroup;
445: PetscInt jstart,jend;
446: /* jstart is used in loops to denote the position in iperm where a
447: * group starts; jend denotes the position where it ends.
448: * (jend + 1 is where the next group starts.) */
449: PetscInt iold,nz;
450: PetscInt istart,iend,isize;
451: PetscInt ipos;
452: PetscScalar yp[NDIM];
453: PetscInt ip[NDIM];
454: /* yp[] and ip[] are treated as vector "registers" for performing
455: * the mat-vec. */
457: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
458: #pragma disjoint(*x,*y,*aa)
459: #endif
462: VecGetArray(xx,&x);
463: VecGetArray(yy,&y);
464: if (yy != ww) {
465: VecGetArray(ww,&w);
466: } else {
467: w = y;
468: }
470: aj = a->j; /* aj[k] gives column index for element aa[k]. */
471: aa = a->a; /* Nonzero elements stored row-by-row. */
472: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
474: /* Get the info we need about the permutations and groupings. */
475: csrperm = (Mat_SeqCSRPERM *) A->spptr;
476: iperm = csrperm->iperm;
477: ngroup = csrperm->ngroup;
478: xgroup = csrperm->xgroup;
479: nzgroup = csrperm->nzgroup;
480:
481: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
482: fortranmultaddcsrperm_(&m,x,ii,aj,aa,y,w);
483: #else
485: for(igroup=0; igroup<ngroup; igroup++) {
486: jstart = xgroup[igroup];
487: jend = xgroup[igroup+1] - 1;
489: nz = nzgroup[igroup];
491: /* Handle the special cases where the number of nonzeros per row
492: * in the group is either 0 or 1. */
493: if(nz == 0) {
494: for(i=jstart; i<=jend; i++) {
495: iold = iperm[i];
496: y[iold] = w[iold];
497: }
498: }
499: else if(nz == 1) {
500: for(i=jstart; i<=jend; i++) {
501: iold = iperm[i];
502: ipos = ai[iold];
503: y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
504: }
505: }
506: /* For the general case: */
507: else {
508:
509: /* We work our way through the current group in chunks of NDIM rows
510: * at a time. */
512: for(istart=jstart; istart<=jend; istart+=NDIM) {
513: /* Figure out where the chunk of 'isize' rows ends in iperm.
514: * 'isize may of course be less than NDIM for the last chunk. */
515: iend = istart + (NDIM - 1);
516: if(iend > jend) { iend = jend; }
517: isize = iend - istart + 1;
519: /* Initialize the yp[] array that will be used to hold part of
520: * the permuted results vector, and figure out where in aa each
521: * row of the chunk will begin. */
522: for(i=0; i<isize; i++) {
523: iold = iperm[istart + i];
524: /* iold is a row number from the matrix A *before* reordering. */
525: ip[i] = ai[iold];
526: /* ip[i] tells us where the ith row of the chunk begins in aa. */
527: yp[i] = w[iold];
528: }
530: /* If the number of zeros per row exceeds the number of rows in
531: * the chunk, we should vectorize along nz, that is, perform the
532: * mat-vec one row at a time as in the usual CSR case. */
533: if(nz > isize) {
534: #if defined(PETSC_HAVE_CRAYC)
535: #pragma _CRI preferstream
536: #endif
537: for(i=0; i<isize; i++) {
538: #if defined(PETSC_HAVE_CRAYC)
539: #pragma _CRI prefervector
540: #endif
541: for(j=0; j<nz; j++) {
542: ipos = ip[i] + j;
543: yp[i] += aa[ipos] * x[aj[ipos]];
544: }
545: }
546: }
547: /* Otherwise, there are enough rows in the chunk to make it
548: * worthwhile to vectorize across the rows, that is, to do the
549: * matvec by operating with "columns" of the chunk. */
550: else {
551: for(j=0; j<nz; j++) {
552: for(i=0; i<isize; i++) {
553: ipos = ip[i] + j;
554: yp[i] += aa[ipos] * x[aj[ipos]];
555: }
556: }
557: }
559: #if defined(PETSC_HAVE_CRAYC)
560: #pragma _CRI ivdep
561: #endif
562: /* Put results from yp[] into non-permuted result vector y. */
563: for(i=0; i<isize; i++) {
564: y[iperm[istart+i]] = yp[i];
565: }
566: } /* End processing chunk of isize rows of a group. */
567:
568: } /* End handling matvec for chunk with nz > 1. */
569: } /* End loop over igroup. */
571: #endif
572: PetscLogFlops(2.0*a->nz);
573: VecRestoreArray(xx,&x);
574: VecRestoreArray(yy,&y);
575: if (yy != ww) {
576: VecRestoreArray(ww,&w);
577: }
578: return(0);
579: }
582: /* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a
583: * SeqCSRPERM matrix. This routine is called by the MatCreate_SeqCSRPERM()
584: * routine, but can also be used to convert an assembled SeqAIJ matrix
585: * into a SeqCSRPERM one. */
589: PetscErrorCode MatConvert_SeqAIJ_SeqCSRPERM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
590: {
592: Mat B = *newmat;
593: Mat_SeqCSRPERM *csrperm;
596: if (reuse == MAT_INITIAL_MATRIX) {
597: MatDuplicate(A,MAT_COPY_VALUES,&B);
598: }
600: PetscNewLog(B,Mat_SeqCSRPERM,&csrperm);
601: B->spptr = (void *) csrperm;
603: /* Set function pointers for methods that we inherit from AIJ but override. */
604: B->ops->duplicate = MatDuplicate_SeqCSRPERM;
605: B->ops->assemblyend = MatAssemblyEnd_SeqCSRPERM;
606: B->ops->destroy = MatDestroy_SeqCSRPERM;
607: B->ops->mult = MatMult_SeqCSRPERM;
608: B->ops->multadd = MatMultAdd_SeqCSRPERM;
610: /* If A has already been assembled, compute the permutation. */
611: if (A->assembled) {
612: SeqCSRPERM_create_perm(B);
613: }
614:
615: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqcsrperm_seqaij_C",
616: "MatConvert_SeqCSRPERM_SeqAIJ",MatConvert_SeqCSRPERM_SeqAIJ);
618: PetscObjectChangeTypeName((PetscObject)B,MATSEQCSRPERM);
619: *newmat = B;
620: return(0);
621: }
627: /*@C
628: MatCreateSeqCSRPERM - Creates a sparse matrix of type SEQCSRPERM.
629: This type inherits from AIJ, but calculates some additional permutation
630: information that is used to allow better vectorization of some
631: operations. At the cost of increased storage, the AIJ formatted
632: matrix can be copied to a format in which pieces of the matrix are
633: stored in ELLPACK format, allowing the vectorized matrix multiply
634: routine to use stride-1 memory accesses. As with the AIJ type, it is
635: important to preallocate matrix storage in order to get good assembly
636: performance.
637:
638: Collective on MPI_Comm
640: Input Parameters:
641: + comm - MPI communicator, set to PETSC_COMM_SELF
642: . m - number of rows
643: . n - number of columns
644: . nz - number of nonzeros per row (same for all rows)
645: - nnz - array containing the number of nonzeros in the various rows
646: (possibly different for each row) or PETSC_NULL
648: Output Parameter:
649: . A - the matrix
651: Notes:
652: If nnz is given then nz is ignored
654: Level: intermediate
656: .keywords: matrix, cray, sparse, parallel
658: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
659: @*/
660: PetscErrorCode MatCreateSeqCSRPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
661: {
665: MatCreate(comm,A);
666: MatSetSizes(*A,m,n,m,n);
667: MatSetType(*A,MATSEQCSRPERM);
668: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
669: return(0);
670: }
675: PetscErrorCode MatCreate_SeqCSRPERM(Mat A)
676: {
680: MatSetType(A,MATSEQAIJ);
681: MatConvert_SeqAIJ_SeqCSRPERM(A,MATSEQCSRPERM,MAT_REUSE_MATRIX,&A);
682: return(0);
683: }