Actual source code: csrperm.c

  1: #define PETSCMAT_DLL

  3: /*
  4:   Defines basic operations for the MATSEQCSRPERM matrix class.
  5:   This class is derived from the MATSEQAIJ class and retains the 
  6:   compressed row storage (aka Yale sparse matrix format) but augments 
  7:   it with some permutation information that enables some operations 
  8:   to be more vectorizable.  A physically rearranged copy of the matrix 
  9:   may be stored if the user desires.

 11:   Eventually a variety of permutations may be supported.
 12: */

 14:  #include ../src/mat/impls/aij/seq/aij.h

 16: #define NDIM 512
 17:     /* NDIM specifies how many rows at a time we should work with when 
 18:      * performing the vectorized mat-vec.  This depends on various factors 
 19:      * such as vector register length, etc., and I really need to add a 
 20:      * way for the user (or the library) to tune this.  I'm setting it to  
 21:      * 512 for now since that is what Ed D'Azevedo was using in his Fortran 
 22:      * routines. */

 24: typedef struct {
 25:   PetscInt  ngroup;
 26:   PetscInt *xgroup;
 27:     /* Denotes where groups of rows with same number of nonzeros 
 28:      * begin and end, i.e., xgroup[i] gives us the position in iperm[] 
 29:      * where the ith group begins. */
 30:   PetscInt *nzgroup; /*  how many nonzeros each row that is a member of group i has. */
 31:   PetscInt *iperm;  /* The permutation vector. */

 33:   /* Flag that indicates whether we need to clean up permutation 
 34:    * information during the MatDestroy. */
 35:   PetscTruth CleanUpCSRPERM;

 37:   /* Some of this stuff is for Ed's recursive triangular solve.
 38:    * I'm not sure what I need yet. */
 39:   PetscInt blocksize;
 40:   PetscInt nstep;
 41:   PetscInt *jstart_list;
 42:   PetscInt *jend_list;
 43:   PetscInt *action_list;
 44:   PetscInt *ngroup_list;
 45:   PetscInt **ipointer_list;
 46:   PetscInt **xgroup_list;
 47:   PetscInt **nzgroup_list;
 48:   PetscInt **iperm_list;
 49: } Mat_SeqCSRPERM;


 56: PetscErrorCode  MatConvert_SeqCSRPERM_SeqAIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 57: {
 58:   /* This routine is only called to convert a MATCSRPERM to its base PETSc type, */
 59:   /* so we will ignore 'MatType type'. */
 61:   Mat            B = *newmat;
 62:   Mat_SeqCSRPERM *csrperm=(Mat_SeqCSRPERM*)A->spptr;

 65:   if (reuse == MAT_INITIAL_MATRIX) {
 66:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 67:   }

 69:   /* Reset the original function pointers. */
 70:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
 71:   B->ops->destroy     = MatDestroy_SeqAIJ;
 72:   B->ops->duplicate   = MatDuplicate_SeqAIJ;

 74:   /* Free everything in the Mat_SeqCSRPERM data structure. 
 75:    * We don't free the Mat_SeqCSRPERM struct itself, as this will 
 76:    * cause problems later when MatDestroy() tries to free it. */
 77:   if(csrperm->CleanUpCSRPERM) {
 78:     PetscFree(csrperm->xgroup);
 79:     PetscFree(csrperm->nzgroup);
 80:     PetscFree(csrperm->iperm);
 81:   }

 83:   /* Change the type of B to MATSEQAIJ. */
 84:   PetscObjectChangeTypeName( (PetscObject)B, MATSEQAIJ);
 85: 
 86:   *newmat = B;
 87:   return(0);
 88: }

 93: PetscErrorCode MatDestroy_SeqCSRPERM(Mat A)
 94: {
 96:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;

 99:   /* Free everything in the Mat_SeqCSRPERM data structure. 
100:    * Note that we don't need to free the Mat_SeqCSRPERM struct 
101:    * itself, as MatDestroy() will do so. */
102:   if(csrperm->CleanUpCSRPERM) {
103:     PetscFree(csrperm->xgroup);
104:     PetscFree(csrperm->nzgroup);
105:     PetscFree(csrperm->iperm);
106:   }

108:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ() 
109:    * to destroy everything that remains. */
110:   PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
111:   /* Note that I don't call MatSetType().  I believe this is because that 
112:    * is only to be called when *building* a matrix.  I could be wrong, but 
113:    * that is how things work for the SuperLU matrix class. */
114:   MatDestroy_SeqAIJ(A);
115:   return(0);
116: }

118: PetscErrorCode MatDuplicate_SeqCSRPERM(Mat A, MatDuplicateOption op, Mat *M)
119: {
121:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
122:   Mat_SeqCSRPERM *csrperm_dest = (Mat_SeqCSRPERM *) (*M)->spptr;

125:   MatDuplicate_SeqAIJ(A,op,M);
126:   PetscMemcpy((*M)->spptr,csrperm,sizeof(Mat_SeqCSRPERM));
127:   /* Allocate space for, and copy the grouping and permutation info. 
128:    * I note that when the groups are initially determined in 
129:    * SeqCSRPERM_create_perm, xgroup and nzgroup may be sized larger than 
130:    * necessary.  But at this point, we know how large they need to be, and 
131:    * allocate only the necessary amount of memory.  So the duplicated matrix 
132:    * may actually use slightly less storage than the original! */
133:   PetscMalloc(A->rmap->n*sizeof(PetscInt), csrperm_dest->iperm);
134:   PetscMalloc((csrperm->ngroup+1)*sizeof(PetscInt), csrperm_dest->xgroup);
135:   PetscMalloc((csrperm->ngroup)*sizeof(PetscInt), csrperm_dest->nzgroup);
136:   PetscMemcpy(csrperm_dest->iperm,csrperm->iperm,sizeof(PetscInt)*A->rmap->n);
137:   PetscMemcpy(csrperm_dest->xgroup,csrperm->xgroup,sizeof(PetscInt)*(csrperm->ngroup+1));
138:   PetscMemcpy(csrperm_dest->nzgroup,csrperm->nzgroup,sizeof(PetscInt)*csrperm->ngroup);
139:   return(0);
140: }

144: PetscErrorCode SeqCSRPERM_create_perm(Mat A)
145: {
146:   PetscInt        m;  /* Number of rows in the matrix. */
147:   Mat_SeqAIJ      *a = (Mat_SeqAIJ *)(A)->data;
148:   PetscInt        *ia;  /* From the CSR representation; points to the beginning  of each row. */
149:   PetscInt        maxnz; /* Maximum number of nonzeros in any row. */
150:   PetscInt        *rows_in_bucket;
151:     /* To construct the permutation, we sort each row into one of maxnz 
152:      * buckets based on how many nonzeros are in the row. */
153:   PetscInt        nz;
154:   PetscInt        *nz_in_row;  /* the number of nonzero elements in row k. */
155:   PetscInt        *ipnz;
156:     /* When constructing the iperm permutation vector, 
157:      * ipnz[nz] is used to point to the next place in the permutation vector 
158:      * that a row with nz nonzero elements should be placed.*/
159:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM*) A->spptr;
160:     /* Points to the MATSEQCSRPERM-specific data in the matrix B. */
162:   PetscInt       i, ngroup, istart, ipos;

164:   /* I really ought to put something in here to check if B is of 
165:    * type MATSEQCSRPERM and return an error code if it is not.
166:    * Come back and do this! */
168:   m  = A->rmap->n;
169:   ia = a->i;
170: 
171:   /* Allocate the arrays that will hold the permutation vector. */
172:   PetscMalloc(m*sizeof(PetscInt), &csrperm->iperm);

174:   /* Allocate some temporary work arrays that will be used in 
175:    * calculating the permuation vector and groupings. */
176:   PetscMalloc((m+1)*sizeof(PetscInt), &rows_in_bucket);
177:   PetscMalloc((m+1)*sizeof(PetscInt), &ipnz);
178:   PetscMalloc(m*sizeof(PetscInt), &nz_in_row);

180:   /* Now actually figure out the permutation and grouping. */

182:   /* First pass: Determine number of nonzeros in each row, maximum 
183:    * number of nonzeros in any row, and how many rows fall into each  
184:    * "bucket" of rows with same number of nonzeros. */
185:   maxnz = 0;
186:   for (i=0; i<m; i++) {
187:     nz_in_row[i] = ia[i+1]-ia[i];
188:     if (nz_in_row[i] > maxnz) maxnz = nz_in_row[i];
189:   }

191:   for (i=0; i<=maxnz; i++) {
192:     rows_in_bucket[i] = 0;
193:   }
194:   for (i=0; i<m; i++) {
195:     nz = nz_in_row[i];
196:     rows_in_bucket[nz]++;
197:   }

199:   /* Allocate space for the grouping info.  There will be at most (maxnz + 1) 
200:    * groups.  (It is maxnz + 1 instead of simply maxnz because there may be 
201:    * rows with no nonzero elements.)  If there are (maxnz + 1) groups, 
202:    * then xgroup[] must consist of (maxnz + 2) elements, since the last 
203:    * element of xgroup will tell us where the (maxnz + 1)th group ends.
204:    * We allocate space for the maximum number of groups; 
205:    * that is potentially a little wasteful, but not too much so.  
206:    * Perhaps I should fix it later. */
207:   PetscMalloc((maxnz+2)*sizeof(PetscInt), &csrperm->xgroup);
208:   PetscMalloc((maxnz+1)*sizeof(PetscInt), &csrperm->nzgroup);

210:   /* Second pass.  Look at what is in the buckets and create the groupings.
211:    * Note that it is OK to have a group of rows with no non-zero values. */
212:   ngroup = 0;
213:   istart = 0;
214:   for (i=0; i<=maxnz; i++) {
215:     if (rows_in_bucket[i] > 0) {
216:       csrperm->nzgroup[ngroup] = i;
217:       csrperm->xgroup[ngroup] = istart;
218:       ngroup++;
219:       istart += rows_in_bucket[i];
220:     }
221:   }

223:   csrperm->xgroup[ngroup] = istart;
224:   csrperm->ngroup = ngroup;

226:   /* Now fill in the permutation vector iperm. */
227:   ipnz[0] = 0;
228:   for (i=0; i<maxnz; i++) {
229:     ipnz[i+1] = ipnz[i] + rows_in_bucket[i];
230:   }

232:   for (i=0; i<m; i++) {
233:     nz = nz_in_row[i];
234:     ipos = ipnz[nz];
235:     csrperm->iperm[ipos] = i;
236:     ipnz[nz]++;
237:   }

239:   /* Clean up temporary work arrays. */
240:   PetscFree(rows_in_bucket);
241:   PetscFree(ipnz);
242:   PetscFree(nz_in_row);

244:   /* Since we've allocated some memory to hold permutation info, 
245:    * flip the CleanUpCSRPERM flag to true. */
246:   csrperm->CleanUpCSRPERM = PETSC_TRUE;
247:   return(0);
248: }


253: PetscErrorCode MatAssemblyEnd_SeqCSRPERM(Mat A, MatAssemblyType mode)
254: {
256:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;

259:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);
260: 
261:   /* Since a MATSEQCSRPERM matrix is really just a MATSEQAIJ with some 
262:    * extra information, call the AssemblyEnd routine for a MATSEQAIJ. 
263:    * I'm not sure if this is the best way to do this, but it avoids 
264:    * a lot of code duplication.
265:    * I also note that currently MATSEQCSRPERM doesn't know anything about 
266:    * the Mat_CompressedRow data structure that SeqAIJ now uses when there 
267:    * are many zero rows.  If the SeqAIJ assembly end routine decides to use 
268:    * this, this may break things.  (Don't know... haven't looked at it.) */
269:   a->inode.use = PETSC_FALSE;
270:   MatAssemblyEnd_SeqAIJ(A, mode);

272:   /* Now calculate the permutation and grouping information. */
273:   SeqCSRPERM_create_perm(A);
274:   return(0);
275: }

279: PetscErrorCode MatMult_SeqCSRPERM(Mat A,Vec xx,Vec yy)
280: {
281:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
282:   PetscScalar     *x,*y;
283:   const MatScalar *aa;
284:   PetscErrorCode  ierr;
285:   PetscInt        *aj,*ai;
286: #if !(defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking))
287:   PetscInt        i,j;
288: #endif

290:   /* Variables that don't appear in MatMult_SeqAIJ. */
291:   Mat_SeqCSRPERM *csrperm = (Mat_SeqCSRPERM *) A->spptr;
292:   PetscInt       *iperm;  /* Points to the permutation vector. */
293:   PetscInt       *xgroup;
294:     /* Denotes where groups of rows with same number of nonzeros 
295:      * begin and end in iperm. */
296:   PetscInt       *nzgroup;
297:   PetscInt       ngroup;
298:   PetscInt       igroup;
299:   PetscInt       jstart,jend;
300:     /* jstart is used in loops to denote the position in iperm where a 
301:      * group starts; jend denotes the position where it ends.
302:      * (jend + 1 is where the next group starts.) */
303:   PetscInt       iold,nz;
304:   PetscInt       istart,iend,isize;
305:   PetscInt       ipos;
306:   PetscScalar    yp[NDIM];
307:   PetscInt       ip[NDIM]; /* yp[] and ip[] are treated as vector "registers" for performing the mat-vec. */

309: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
310: #pragma disjoint(*x,*y,*aa)
311: #endif

314:   VecGetArray(xx,&x);
315:   VecGetArray(yy,&y);
316:   aj  = a->j;    /* aj[k] gives column index for element aa[k]. */
317:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
318:   ai  = a->i;   /* ai[k] is the position in aa and aj where row k starts. */

320:   /* Get the info we need about the permutations and groupings. */
321:   iperm  = csrperm->iperm;
322:   ngroup = csrperm->ngroup;
323:   xgroup = csrperm->xgroup;
324:   nzgroup = csrperm->nzgroup;
325: 
326: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCSRPERM) && defined(notworking)
327:   fortranmultcsrperm_(&m,x,ii,aj,aa,y);
328: #else

330:   for (igroup=0; igroup<ngroup; igroup++) {
331:     jstart = xgroup[igroup];
332:     jend   = xgroup[igroup+1] - 1;
333:     nz     = nzgroup[igroup];

335:     /* Handle the special cases where the number of nonzeros per row 
336:      * in the group is either 0 or 1. */
337:     if (nz == 0) {
338:       for (i=jstart; i<=jend; i++) {
339:         y[iperm[i]] = 0.0;
340:       }
341:     } else if (nz == 1) {
342:       for (i=jstart; i<=jend; i++) {
343:         iold = iperm[i];
344:         ipos = ai[iold];
345:         y[iold] = aa[ipos] * x[aj[ipos]];
346:       }
347:     } else {
348: 
349:       /* We work our way through the current group in chunks of NDIM rows 
350:        * at a time. */

352:       for (istart=jstart; istart<=jend; istart+=NDIM) {
353:         /* Figure out where the chunk of 'isize' rows ends in iperm.
354:          * 'isize may of course be less than NDIM for the last chunk. */
355:         iend = istart + (NDIM - 1);
356:         if (iend > jend) { iend = jend; }
357:         isize = iend - istart + 1;

359:         /* Initialize the yp[] array that will be used to hold part of 
360:          * the permuted results vector, and figure out where in aa each 
361:          * row of the chunk will begin. */
362:         for (i=0; i<isize; i++) {
363:           iold = iperm[istart + i];
364:             /* iold is a row number from the matrix A *before* reordering. */
365:           ip[i] = ai[iold];
366:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
367:           yp[i] = (PetscScalar) 0.0;
368:         }

370:         /* If the number of zeros per row exceeds the number of rows in 
371:          * the chunk, we should vectorize along nz, that is, perform the 
372:          * mat-vec one row at a time as in the usual CSR case. */
373:         if (nz > isize) {
374: #if defined(PETSC_HAVE_CRAYC)
375: #pragma _CRI preferstream
376: #endif
377:           for (i=0; i<isize; i++) {
378: #if defined(PETSC_HAVE_CRAYC)
379: #pragma _CRI prefervector
380: #endif
381:             for (j=0; j<nz; j++) {
382:               ipos = ip[i] + j;
383:               yp[i] += aa[ipos] * x[aj[ipos]];
384:             }
385:           }
386:         } else {
387:         /* Otherwise, there are enough rows in the chunk to make it 
388:          * worthwhile to vectorize across the rows, that is, to do the 
389:          * matvec by operating with "columns" of the chunk. */
390:           for (j=0; j<nz; j++) {
391:             for(i=0; i<isize; i++) {
392:               ipos = ip[i] + j;
393:               yp[i] += aa[ipos] * x[aj[ipos]];
394:             }
395:           }
396:         }

398: #if defined(PETSC_HAVE_CRAYC)
399: #pragma _CRI ivdep
400: #endif
401:         /* Put results from yp[] into non-permuted result vector y. */
402:         for (i=0; i<isize; i++) {
403:           y[iperm[istart+i]] = yp[i];
404:         }
405:       } /* End processing chunk of isize rows of a group. */
406:     } /* End handling matvec for chunk with nz > 1. */
407:   } /* End loop over igroup. */
408: #endif
409:   PetscLogFlops(2.0*a->nz - A->rmap->n);
410:   VecRestoreArray(xx,&x);
411:   VecRestoreArray(yy,&y);
412:   return(0);
413: }


416: /* MatMultAdd_SeqCSRPERM() calculates yy = ww + A * xx.
417:  * Note that the names I used to designate the vectors differs from that 
418:  * used in MatMultAdd_SeqAIJ().  I did this to keep my notation consistent 
419:  * with the MatMult_SeqCSRPERM() routine, which is very similar to this one. */
420: /*
421:     I hate having virtually identical code for the mult and the multadd!!!
422: */
425: PetscErrorCode MatMultAdd_SeqCSRPERM(Mat A,Vec xx,Vec ww,Vec yy)
426: {
427:   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
428:   PetscScalar     *x,*y,*w;
429:   const MatScalar *aa;
430:   PetscErrorCode  ierr;
431:   PetscInt        *aj,*ai;
432: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
433:   PetscInt        i,j;
434: #endif

436:   /* Variables that don't appear in MatMultAdd_SeqAIJ. */
437:   Mat_SeqCSRPERM *csrperm;
438:   PetscInt       *iperm;  /* Points to the permutation vector. */
439:   PetscInt       *xgroup;
440:     /* Denotes where groups of rows with same number of nonzeros 
441:      * begin and end in iperm. */
442:   PetscInt       *nzgroup;
443:   PetscInt       ngroup;
444:   PetscInt       igroup;
445:   PetscInt       jstart,jend;
446:     /* jstart is used in loops to denote the position in iperm where a 
447:      * group starts; jend denotes the position where it ends.
448:      * (jend + 1 is where the next group starts.) */
449:   PetscInt       iold,nz;
450:   PetscInt       istart,iend,isize;
451:   PetscInt       ipos;
452:   PetscScalar    yp[NDIM];
453:   PetscInt       ip[NDIM];
454:     /* yp[] and ip[] are treated as vector "registers" for performing 
455:      * the mat-vec. */

457: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
458: #pragma disjoint(*x,*y,*aa)
459: #endif

462:   VecGetArray(xx,&x);
463:   VecGetArray(yy,&y);
464:   if (yy != ww) {
465:     VecGetArray(ww,&w);
466:   } else {
467:     w = y;
468:   }

470:   aj  = a->j;  /* aj[k] gives column index for element aa[k]. */
471:   aa  = a->a;  /* Nonzero elements stored row-by-row. */
472:   ai  = a->i;  /* ai[k] is the position in aa and aj where row k starts. */

474:   /* Get the info we need about the permutations and groupings. */
475:   csrperm = (Mat_SeqCSRPERM *) A->spptr;
476:   iperm = csrperm->iperm;
477:   ngroup = csrperm->ngroup;
478:   xgroup = csrperm->xgroup;
479:   nzgroup = csrperm->nzgroup;
480: 
481: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDCSRPERM)
482:   fortranmultaddcsrperm_(&m,x,ii,aj,aa,y,w);
483: #else

485:   for(igroup=0; igroup<ngroup; igroup++) {
486:     jstart = xgroup[igroup];
487:     jend = xgroup[igroup+1] - 1;

489:     nz = nzgroup[igroup];

491:     /* Handle the special cases where the number of nonzeros per row 
492:      * in the group is either 0 or 1. */
493:     if(nz == 0) {
494:       for(i=jstart; i<=jend; i++) {
495:         iold = iperm[i];
496:         y[iold] = w[iold];
497:       }
498:     }
499:     else if(nz == 1) {
500:       for(i=jstart; i<=jend; i++) {
501:         iold = iperm[i];
502:         ipos = ai[iold];
503:         y[iold] = w[iold] + aa[ipos] * x[aj[ipos]];
504:       }
505:     }
506:     /* For the general case: */
507:     else {
508: 
509:       /* We work our way through the current group in chunks of NDIM rows 
510:        * at a time. */

512:       for(istart=jstart; istart<=jend; istart+=NDIM) {
513:         /* Figure out where the chunk of 'isize' rows ends in iperm.
514:          * 'isize may of course be less than NDIM for the last chunk. */
515:         iend = istart + (NDIM - 1);
516:         if(iend > jend) { iend = jend; }
517:         isize = iend - istart + 1;

519:         /* Initialize the yp[] array that will be used to hold part of 
520:          * the permuted results vector, and figure out where in aa each 
521:          * row of the chunk will begin. */
522:         for(i=0; i<isize; i++) {
523:           iold = iperm[istart + i];
524:             /* iold is a row number from the matrix A *before* reordering. */
525:           ip[i] = ai[iold];
526:             /* ip[i] tells us where the ith row of the chunk begins in aa. */
527:           yp[i] = w[iold];
528:         }

530:         /* If the number of zeros per row exceeds the number of rows in 
531:          * the chunk, we should vectorize along nz, that is, perform the 
532:          * mat-vec one row at a time as in the usual CSR case. */
533:         if(nz > isize) {
534: #if defined(PETSC_HAVE_CRAYC)
535: #pragma _CRI preferstream
536: #endif
537:           for(i=0; i<isize; i++) {
538: #if defined(PETSC_HAVE_CRAYC)
539: #pragma _CRI prefervector
540: #endif
541:             for(j=0; j<nz; j++) {
542:               ipos = ip[i] + j;
543:               yp[i] += aa[ipos] * x[aj[ipos]];
544:             }
545:           }
546:         }
547:         /* Otherwise, there are enough rows in the chunk to make it 
548:          * worthwhile to vectorize across the rows, that is, to do the 
549:          * matvec by operating with "columns" of the chunk. */
550:         else {
551:           for(j=0; j<nz; j++) {
552:             for(i=0; i<isize; i++) {
553:               ipos = ip[i] + j;
554:               yp[i] += aa[ipos] * x[aj[ipos]];
555:             }
556:           }
557:         }

559: #if defined(PETSC_HAVE_CRAYC)
560: #pragma _CRI ivdep
561: #endif
562:         /* Put results from yp[] into non-permuted result vector y. */
563:         for(i=0; i<isize; i++) {
564:           y[iperm[istart+i]] = yp[i];
565:         }
566:       } /* End processing chunk of isize rows of a group. */
567: 
568:     } /* End handling matvec for chunk with nz > 1. */
569:   } /* End loop over igroup. */

571: #endif
572:   PetscLogFlops(2.0*a->nz);
573:   VecRestoreArray(xx,&x);
574:   VecRestoreArray(yy,&y);
575:   if (yy != ww) {
576:     VecRestoreArray(ww,&w);
577:   }
578:   return(0);
579: }


582: /* MatConvert_SeqAIJ_SeqCSRPERM converts a SeqAIJ matrix into a 
583:  * SeqCSRPERM matrix.  This routine is called by the MatCreate_SeqCSRPERM() 
584:  * routine, but can also be used to convert an assembled SeqAIJ matrix 
585:  * into a SeqCSRPERM one. */
589: PetscErrorCode  MatConvert_SeqAIJ_SeqCSRPERM(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
590: {
592:   Mat            B = *newmat;
593:   Mat_SeqCSRPERM *csrperm;

596:   if (reuse == MAT_INITIAL_MATRIX) {
597:     MatDuplicate(A,MAT_COPY_VALUES,&B);
598:   }

600:   PetscNewLog(B,Mat_SeqCSRPERM,&csrperm);
601:   B->spptr = (void *) csrperm;

603:   /* Set function pointers for methods that we inherit from AIJ but override. */
604:   B->ops->duplicate   = MatDuplicate_SeqCSRPERM;
605:   B->ops->assemblyend = MatAssemblyEnd_SeqCSRPERM;
606:   B->ops->destroy     = MatDestroy_SeqCSRPERM;
607:   B->ops->mult        = MatMult_SeqCSRPERM;
608:   B->ops->multadd     = MatMultAdd_SeqCSRPERM;

610:   /* If A has already been assembled, compute the permutation. */
611:   if (A->assembled) {
612:     SeqCSRPERM_create_perm(B);
613:   }
614: 
615:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqcsrperm_seqaij_C",
616:                                            "MatConvert_SeqCSRPERM_SeqAIJ",MatConvert_SeqCSRPERM_SeqAIJ);

618:   PetscObjectChangeTypeName((PetscObject)B,MATSEQCSRPERM);
619:   *newmat = B;
620:   return(0);
621: }


627: /*@C
628:    MatCreateSeqCSRPERM - Creates a sparse matrix of type SEQCSRPERM.
629:    This type inherits from AIJ, but calculates some additional permutation 
630:    information that is used to allow better vectorization of some 
631:    operations.  At the cost of increased storage, the AIJ formatted 
632:    matrix can be copied to a format in which pieces of the matrix are 
633:    stored in ELLPACK format, allowing the vectorized matrix multiply 
634:    routine to use stride-1 memory accesses.  As with the AIJ type, it is 
635:    important to preallocate matrix storage in order to get good assembly 
636:    performance.
637:    
638:    Collective on MPI_Comm

640:    Input Parameters:
641: +  comm - MPI communicator, set to PETSC_COMM_SELF
642: .  m - number of rows
643: .  n - number of columns
644: .  nz - number of nonzeros per row (same for all rows)
645: -  nnz - array containing the number of nonzeros in the various rows 
646:          (possibly different for each row) or PETSC_NULL

648:    Output Parameter:
649: .  A - the matrix 

651:    Notes:
652:    If nnz is given then nz is ignored

654:    Level: intermediate

656: .keywords: matrix, cray, sparse, parallel

658: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
659: @*/
660: PetscErrorCode  MatCreateSeqCSRPERM(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
661: {

665:   MatCreate(comm,A);
666:   MatSetSizes(*A,m,n,m,n);
667:   MatSetType(*A,MATSEQCSRPERM);
668:   MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
669:   return(0);
670: }

675: PetscErrorCode  MatCreate_SeqCSRPERM(Mat A)
676: {

680:   MatSetType(A,MATSEQAIJ);
681:   MatConvert_SeqAIJ_SeqCSRPERM(A,MATSEQCSRPERM,MAT_REUSE_MATRIX,&A);
682:   return(0);
683: }