Actual source code: crl.c
1: #define PETSCMAT_DLL
3: /*
4: Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
5: This class is derived from the MATSEQAIJ class and retains the
6: compressed row storage (aka Yale sparse matrix format) but augments
7: it with a column oriented storage that is more efficient for
8: matrix vector products on Vector machines.
10: CRL stands for constant row length (that is the same number of columns
11: is kept (padded with zeros) for each row of the sparse matrix.
12: */
13: #include ../src/mat/impls/aij/seq/crl/crl.h
17: PetscErrorCode MatDestroy_SeqCRL(Mat A)
18: {
20: Mat_CRL *crl = (Mat_CRL *) A->spptr;
22: /* Free everything in the Mat_CRL data structure. */
23: PetscFree2(crl->acols,crl->icols);
24: PetscFree(crl);
25: A->spptr = 0;
27: PetscObjectChangeTypeName( (PetscObject)A, MATSEQAIJ);
28: MatDestroy_SeqAIJ(A);
29: return(0);
30: }
32: PetscErrorCode MatDuplicate_CRL(Mat A, MatDuplicateOption op, Mat *M)
33: {
35: SETERRQ(PETSC_ERR_SUP,"Cannot duplicate CRL matrices yet");
36: return(0);
37: }
41: PetscErrorCode SeqCRL_create_crl(Mat A)
42: {
43: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
44: Mat_CRL *crl = (Mat_CRL*) A->spptr;
45: PetscInt m = A->rmap->n; /* Number of rows in the matrix. */
46: PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */
47: PetscInt i, j,rmax = a->rmax,*icols, *ilen = a->ilen;
48: MatScalar *aa = a->a;
49: PetscScalar *acols;
53: crl->nz = a->nz;
54: crl->m = A->rmap->n;
55: crl->rmax = rmax;
56: PetscFree2(crl->acols,crl->icols);
57: PetscMalloc2(rmax*m,PetscScalar,&crl->acols,rmax*m,PetscInt,&crl->icols);
58: acols = crl->acols;
59: icols = crl->icols;
60: for (i=0; i<m; i++) {
61: for (j=0; j<ilen[i]; j++) {
62: acols[j*m+i] = *aa++;
63: icols[j*m+i] = *aj++;
64: }
65: for (;j<rmax; j++) { /* empty column entries */
66: acols[j*m+i] = 0.0;
67: icols[j*m+i] = (j) ? icols[(j-1)*m+i] : 0; /* handle case where row is EMPTY */
68: }
69: }
70: PetscInfo2(A,"Percentage of 0's introduced for vectorized multiply %G. Rmax= %D\n",1.0-((double)a->nz)/((double)(rmax*m)),rmax);
71: return(0);
72: }
78: PetscErrorCode MatAssemblyEnd_SeqCRL(Mat A, MatAssemblyType mode)
79: {
81: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
84: a->inode.use = PETSC_FALSE;
85: MatAssemblyEnd_SeqAIJ(A,mode);
86: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
88: /* Now calculate the permutation and grouping information. */
89: SeqCRL_create_crl(A);
90: return(0);
91: }
93: #include "../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h"
97: /*
98: Shared by both sequential and parallel versions of CRL matrix: MATMPICRL and MATSEQCRL
99: - the scatter is used only in the parallel version
101: */
102: PetscErrorCode MatMult_CRL(Mat A,Vec xx,Vec yy)
103: {
104: Mat_CRL *crl = (Mat_CRL*) A->spptr;
105: PetscInt m = crl->m; /* Number of rows in the matrix. */
106: PetscInt rmax = crl->rmax,*icols = crl->icols;
107: PetscScalar *acols = crl->acols;
109: PetscScalar *x,*y;
110: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
111: PetscInt i,j,ii;
112: #endif
115: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
116: #pragma disjoint(*x,*y,*aa)
117: #endif
120: if (crl->xscat) {
121: VecCopy(xx,crl->xwork);
122: /* get remote values needed for local part of multiply */
123: VecScatterBegin(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);
124: VecScatterEnd(crl->xscat,xx,crl->fwork,INSERT_VALUES,SCATTER_FORWARD);
125: xx = crl->xwork;
126: };
128: VecGetArray(xx,&x);
129: VecGetArray(yy,&y);
131: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
132: fortranmultcrl_(&m,&rmax,x,y,icols,acols);
133: #else
135: /* first column */
136: for (j=0; j<m; j++) {
137: y[j] = acols[j]*x[icols[j]];
138: }
140: /* other columns */
141: #if defined(PETSC_HAVE_CRAYC)
142: #pragma _CRI preferstream
143: #endif
144: for (i=1; i<rmax; i++) {
145: ii = i*m;
146: #if defined(PETSC_HAVE_CRAYC)
147: #pragma _CRI prefervector
148: #endif
149: for (j=0; j<m; j++) {
150: y[j] = y[j] + acols[ii+j]*x[icols[ii+j]];
151: }
152: }
153: #if defined(PETSC_HAVE_CRAYC)
154: #pragma _CRI ivdep
155: #endif
157: #endif
158: PetscLogFlops(2.0*crl->nz - m);
159: VecRestoreArray(xx,&x);
160: VecRestoreArray(yy,&y);
161: return(0);
162: }
165: /* MatConvert_SeqAIJ_SeqCRL converts a SeqAIJ matrix into a
166: * SeqCRL matrix. This routine is called by the MatCreate_SeqCRL()
167: * routine, but can also be used to convert an assembled SeqAIJ matrix
168: * into a SeqCRL one. */
172: PetscErrorCode MatConvert_SeqAIJ_SeqCRL(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
173: {
175: Mat B = *newmat;
176: Mat_CRL *crl;
179: if (reuse == MAT_INITIAL_MATRIX) {
180: MatDuplicate(A,MAT_COPY_VALUES,&B);
181: }
183: PetscNewLog(B,Mat_CRL,&crl);
184: B->spptr = (void *) crl;
186: /* Set function pointers for methods that we inherit from AIJ but override. */
187: B->ops->duplicate = MatDuplicate_CRL;
188: B->ops->assemblyend = MatAssemblyEnd_SeqCRL;
189: B->ops->destroy = MatDestroy_SeqCRL;
190: B->ops->mult = MatMult_CRL;
192: /* If A has already been assembled, compute the permutation. */
193: if (A->assembled) {
194: SeqCRL_create_crl(B);
195: }
196: PetscObjectChangeTypeName((PetscObject)B,MATSEQCRL);
197: *newmat = B;
198: return(0);
199: }
205: /*@C
206: MatCreateSeqCRL - Creates a sparse matrix of type SEQCRL.
207: This type inherits from AIJ, but stores some additional
208: information that is used to allow better vectorization of
209: the matrix-vector product. At the cost of increased storage, the AIJ formatted
210: matrix can be copied to a format in which pieces of the matrix are
211: stored in ELLPACK format, allowing the vectorized matrix multiply
212: routine to use stride-1 memory accesses. As with the AIJ type, it is
213: important to preallocate matrix storage in order to get good assembly
214: performance.
215:
216: Collective on MPI_Comm
218: Input Parameters:
219: + comm - MPI communicator, set to PETSC_COMM_SELF
220: . m - number of rows
221: . n - number of columns
222: . nz - number of nonzeros per row (same for all rows)
223: - nnz - array containing the number of nonzeros in the various rows
224: (possibly different for each row) or PETSC_NULL
226: Output Parameter:
227: . A - the matrix
229: Notes:
230: If nnz is given then nz is ignored
232: Level: intermediate
234: .keywords: matrix, cray, sparse, parallel
236: .seealso: MatCreate(), MatCreateMPICSRPERM(), MatSetValues()
237: @*/
238: PetscErrorCode MatCreateSeqCRL(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
239: {
243: MatCreate(comm,A);
244: MatSetSizes(*A,m,n,m,n);
245: MatSetType(*A,MATSEQCRL);
246: MatSeqAIJSetPreallocation_SeqAIJ(*A,nz,(PetscInt*)nnz);
247: return(0);
248: }
254: PetscErrorCode MatCreate_SeqCRL(Mat A)
255: {
259: MatSetType(A,MATSEQAIJ);
260: MatConvert_SeqAIJ_SeqCRL(A,MATSEQCRL,MAT_REUSE_MATRIX,&A);
261: return(0);
262: }