Actual source code: aijfact.c
1: #define PETSCMAT_DLL
4: #include ../src/mat/impls/aij/seq/aij.h
5: #include ../src/mat/impls/sbaij/seq/sbaij.h
6: #include petscbt.h
7: #include ../src/mat/utils/freespace.h
12: /*
13: Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix
14: */
15: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
16: {
17: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data;
18: PetscErrorCode ierr;
19: PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order;
20: const PetscInt *ai = a->i, *aj = a->j;
21: const PetscScalar *aa = a->a;
22: PetscTruth *done;
23: PetscReal best,past = 0,future;
26: /* pick initial row */
27: best = -1;
28: for (i=0; i<n; i++) {
29: future = 0.0;
30: for (j=ai[i]; j<ai[i+1]; j++) {
31: if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]);
32: }
33: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
34: if (past/future > best) {
35: best = past/future;
36: current = i;
37: }
38: }
40: PetscMalloc(n*sizeof(PetscTruth),&done);
41: PetscMemzero(done,n*sizeof(PetscTruth));
42: PetscMalloc(n*sizeof(PetscInt),&order);
43: order[0] = current;
44: for (i=0; i<n-1; i++) {
45: done[current] = PETSC_TRUE;
46: best = -1;
47: /* loop over all neighbors of current pivot */
48: for (j=ai[current]; j<ai[current+1]; j++) {
49: jj = aj[j];
50: if (done[jj]) continue;
51: /* loop over columns of potential next row computing weights for below and above diagonal */
52: past = future = 0.0;
53: for (k=ai[jj]; k<ai[jj+1]; k++) {
54: kk = aj[k];
55: if (done[kk]) past += PetscAbsScalar(aa[k]);
56: else if (kk != jj) future += PetscAbsScalar(aa[k]);
57: }
58: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
59: if (past/future > best) {
60: best = past/future;
61: newcurrent = jj;
62: }
63: }
64: if (best == -1) { /* no neighbors to select from so select best of all that remain */
65: best = -1;
66: for (k=0; k<n; k++) {
67: if (done[k]) continue;
68: future = 0.0;
69: past = 0.0;
70: for (j=ai[k]; j<ai[k+1]; j++) {
71: kk = aj[j];
72: if (done[kk]) past += PetscAbsScalar(aa[j]);
73: else if (kk != k) future += PetscAbsScalar(aa[j]);
74: }
75: if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */
76: if (past/future > best) {
77: best = past/future;
78: newcurrent = k;
79: }
80: }
81: }
82: if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current");
83: current = newcurrent;
84: order[i+1] = current;
85: }
86: ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);
87: *icol = *irow;
88: PetscObjectReference((PetscObject)*irow);
89: PetscFree(done);
90: PetscFree(order);
91: return(0);
92: }
98: PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg)
99: {
101: *flg = PETSC_TRUE;
102: return(0);
103: }
109: PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B)
110: {
111: PetscInt n = A->rmap->n;
112: PetscErrorCode ierr;
115: MatCreate(((PetscObject)A)->comm,B);
116: MatSetSizes(*B,n,n,n,n);
117: if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){
118: MatSetType(*B,MATSEQAIJ);
119: (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ;
120: (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
121: } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) {
122: MatSetType(*B,MATSEQSBAIJ);
123: MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
124: (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ;
125: (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ;
126: } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported");
127: (*B)->factor = ftype;
128: return(0);
129: }
134: PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
135: {
136: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
137: IS isicol;
138: PetscErrorCode ierr;
139: const PetscInt *r,*ic;
140: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
141: PetscInt *bi,*bj,*ajtmp;
142: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
143: PetscReal f;
144: PetscInt nlnk,*lnk,k,**bi_ptr;
145: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
146: PetscBT lnkbt;
147:
149: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
150: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
151: ISGetIndices(isrow,&r);
152: ISGetIndices(isicol,&ic);
154: /* get new row pointers */
155: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
156: bi[0] = 0;
158: /* bdiag is location of diagonal in factor */
159: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
160: bdiag[0] = 0;
162: /* linked list for storing column indices of the active row */
163: nlnk = n + 1;
164: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
166: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
168: /* initial FreeSpace size is f*(ai[n]+1) */
169: f = info->fill;
170: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
171: current_space = free_space;
173: for (i=0; i<n; i++) {
174: /* copy previous fill into linked list */
175: nzi = 0;
176: nnz = ai[r[i]+1] - ai[r[i]];
177: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
178: ajtmp = aj + ai[r[i]];
179: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
180: nzi += nlnk;
182: /* add pivot rows into linked list */
183: row = lnk[n];
184: while (row < i) {
185: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
186: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
187: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
188: nzi += nlnk;
189: row = lnk[row];
190: }
191: bi[i+1] = bi[i] + nzi;
192: im[i] = nzi;
194: /* mark bdiag */
195: nzbd = 0;
196: nnz = nzi;
197: k = lnk[n];
198: while (nnz-- && k < i){
199: nzbd++;
200: k = lnk[k];
201: }
202: bdiag[i] = bi[i] + nzbd;
204: /* if free space is not available, make more free space */
205: if (current_space->local_remaining<nzi) {
206: nnz = (n - i)*nzi; /* estimated and max additional space needed */
207: PetscFreeSpaceGet(nnz,¤t_space);
208: reallocs++;
209: }
211: /* copy data into free space, then initialize lnk */
212: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
213: bi_ptr[i] = current_space->array;
214: current_space->array += nzi;
215: current_space->local_used += nzi;
216: current_space->local_remaining -= nzi;
217: }
218: #if defined(PETSC_USE_INFO)
219: if (ai[n] != 0) {
220: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
221: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
222: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
223: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
224: PetscInfo(A,"for best performance.\n");
225: } else {
226: PetscInfo(A,"Empty matrix\n");
227: }
228: #endif
230: ISRestoreIndices(isrow,&r);
231: ISRestoreIndices(isicol,&ic);
233: /* destroy list of free space and other temporary array(s) */
234: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
235: PetscFreeSpaceContiguous(&free_space,bj);
236: PetscLLDestroy(lnk,lnkbt);
237: PetscFree2(bi_ptr,im);
239: /* put together the new matrix */
240: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
241: PetscLogObjectParent(B,isicol);
242: b = (Mat_SeqAIJ*)(B)->data;
243: b->free_a = PETSC_TRUE;
244: b->free_ij = PETSC_TRUE;
245: b->singlemalloc = PETSC_FALSE;
246: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
247: b->j = bj;
248: b->i = bi;
249: b->diag = bdiag;
250: b->ilen = 0;
251: b->imax = 0;
252: b->row = isrow;
253: b->col = iscol;
254: PetscObjectReference((PetscObject)isrow);
255: PetscObjectReference((PetscObject)iscol);
256: b->icol = isicol;
257: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
259: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
260: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
261: b->maxnz = b->nz = bi[n] ;
263: (B)->factor = MAT_FACTOR_LU;
264: (B)->info.factor_mallocs = reallocs;
265: (B)->info.fill_ratio_given = f;
267: if (ai[n]) {
268: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
269: } else {
270: (B)->info.fill_ratio_needed = 0.0;
271: }
272: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
273: if (a->inode.size) {
274: (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
275: }
276: return(0);
277: }
281: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
282: {
283: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
284: IS isicol;
285: PetscErrorCode ierr;
286: const PetscInt *r,*ic;
287: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j;
288: PetscInt *bi,*bj,*ajtmp;
289: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
290: PetscReal f;
291: PetscInt nlnk,*lnk,k,**bi_ptr;
292: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
293: PetscBT lnkbt;
294: PetscTruth olddatastruct=PETSC_FALSE;
297: /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */
298: PetscOptionsGetTruth(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
299: if(olddatastruct){
300: MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);
301: return(0);
302: }
303:
305: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
306: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
307: ISGetIndices(isrow,&r);
308: ISGetIndices(isicol,&ic);
310: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
311: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
312: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
313: bi[0] = bdiag[0] = 0;
315: /* linked list for storing column indices of the active row */
316: nlnk = n + 1;
317: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
319: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
321: /* initial FreeSpace size is f*(ai[n]+1) */
322: f = info->fill;
323: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
324: current_space = free_space;
326: for (i=0; i<n; i++) {
327: /* copy previous fill into linked list */
328: nzi = 0;
329: nnz = ai[r[i]+1] - ai[r[i]];
330: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
331: ajtmp = aj + ai[r[i]];
332: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
333: nzi += nlnk;
335: /* add pivot rows into linked list */
336: row = lnk[n];
337: while (row < i){
338: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
339: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
340: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
341: nzi += nlnk;
342: row = lnk[row];
343: }
344: bi[i+1] = bi[i] + nzi;
345: im[i] = nzi;
347: /* mark bdiag */
348: nzbd = 0;
349: nnz = nzi;
350: k = lnk[n];
351: while (nnz-- && k < i){
352: nzbd++;
353: k = lnk[k];
354: }
355: bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
357: /* if free space is not available, make more free space */
358: if (current_space->local_remaining<nzi) {
359: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
360: PetscFreeSpaceGet(nnz,¤t_space);
361: reallocs++;
362: }
364: /* copy data into free space, then initialize lnk */
365: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
366: bi_ptr[i] = current_space->array;
367: current_space->array += nzi;
368: current_space->local_used += nzi;
369: current_space->local_remaining -= nzi;
370: }
371: #if defined(PETSC_USE_INFO)
372: if (ai[n] != 0) {
373: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
374: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
375: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
376: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
377: PetscInfo(A,"for best performance.\n");
378: } else {
379: PetscInfo(A,"Empty matrix\n");
380: }
381: #endif
383: ISRestoreIndices(isrow,&r);
384: ISRestoreIndices(isicol,&ic);
386: /* destroy list of free space and other temporary array(s) */
387: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
388: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
389: PetscLLDestroy(lnk,lnkbt);
390: PetscFree2(bi_ptr,im);
392: /* put together the new matrix */
393: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
394: PetscLogObjectParent(B,isicol);
395: b = (Mat_SeqAIJ*)(B)->data;
396: b->free_a = PETSC_TRUE;
397: b->free_ij = PETSC_TRUE;
398: b->singlemalloc = PETSC_FALSE;
399: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
400: b->j = bj;
401: b->i = bi;
402: b->diag = bdiag;
403: b->ilen = 0;
404: b->imax = 0;
405: b->row = isrow;
406: b->col = iscol;
407: PetscObjectReference((PetscObject)isrow);
408: PetscObjectReference((PetscObject)iscol);
409: b->icol = isicol;
410: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
412: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
413: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
414: b->maxnz = b->nz = bdiag[0]+1;
415: B->factor = MAT_FACTOR_LU;
416: B->info.factor_mallocs = reallocs;
417: B->info.fill_ratio_given = f;
419: if (ai[n]) {
420: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
421: } else {
422: B->info.fill_ratio_needed = 0.0;
423: }
424: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
425: if (a->inode.size) {
426: B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
427: }
428: return(0);
429: }
431: /*
432: Trouble in factorization, should we dump the original matrix?
433: */
436: PetscErrorCode MatFactorDumpMatrix(Mat A)
437: {
439: PetscTruth flg = PETSC_FALSE;
442: PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);
443: if (flg) {
444: PetscViewer viewer;
445: char filename[PETSC_MAX_PATH_LEN];
447: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
448: PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);
449: MatView(A,viewer);
450: PetscViewerDestroy(viewer);
451: }
452: return(0);
453: }
457: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
458: {
459: Mat C=B;
460: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
461: IS isrow = b->row,isicol = b->icol;
462: PetscErrorCode ierr;
463: const PetscInt *r,*ic,*ics;
464: const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag;
465: PetscInt i,j,k,nz,nzL,row,*pj;
466: const PetscInt *ajtmp,*bjtmp;
467: MatScalar *rtmp,*pc,multiplier,*pv;
468: const MatScalar *aa=a->a,*v;
469: PetscTruth row_identity,col_identity;
470: FactorShiftCtx sctx;
471: const PetscInt *ddiag;
472: PetscReal rs;
473: MatScalar d;
476: /* MatPivotSetUp(): initialize shift context sctx */
477: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
479: if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
480: ddiag = a->diag;
481: sctx.shift_top = info->zeropivot;
482: for (i=0; i<n; i++) {
483: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
484: d = (aa)[ddiag[i]];
485: rs = -PetscAbsScalar(d) - PetscRealPart(d);
486: v = aa+ai[i];
487: nz = ai[i+1] - ai[i];
488: for (j=0; j<nz; j++)
489: rs += PetscAbsScalar(v[j]);
490: if (rs>sctx.shift_top) sctx.shift_top = rs;
491: }
492: sctx.shift_top *= 1.1;
493: sctx.nshift_max = 5;
494: sctx.shift_lo = 0.;
495: sctx.shift_hi = 1.;
496: }
498: ISGetIndices(isrow,&r);
499: ISGetIndices(isicol,&ic);
500: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
501: ics = ic;
503: do {
504: sctx.useshift = PETSC_FALSE;
505: for (i=0; i<n; i++){
506: /* zero rtmp */
507: /* L part */
508: nz = bi[i+1] - bi[i];
509: bjtmp = bj + bi[i];
510: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
512: /* U part */
513: nz = bdiag[i]-bdiag[i+1];
514: bjtmp = bj + bdiag[i+1]+1;
515: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
516:
517: /* load in initial (unfactored row) */
518: nz = ai[r[i]+1] - ai[r[i]];
519: ajtmp = aj + ai[r[i]];
520: v = aa + ai[r[i]];
521: for (j=0; j<nz; j++) {
522: rtmp[ics[ajtmp[j]]] = v[j];
523: }
524: /* ZeropivotApply() */
525: rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */
526:
527: /* elimination */
528: bjtmp = bj + bi[i];
529: row = *bjtmp++;
530: nzL = bi[i+1] - bi[i];
531: for(k=0; k < nzL;k++) {
532: pc = rtmp + row;
533: if (*pc != 0.0) {
534: pv = b->a + bdiag[row];
535: multiplier = *pc * (*pv);
536: *pc = multiplier;
537: pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */
538: pv = b->a + bdiag[row+1]+1;
539: nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */
540: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
541: PetscLogFlops(2.0*nz);
542: }
543: row = *bjtmp++;
544: }
546: /* finished row so stick it into b->a */
547: rs = 0.0;
548: /* L part */
549: pv = b->a + bi[i] ;
550: pj = b->j + bi[i] ;
551: nz = bi[i+1] - bi[i];
552: for (j=0; j<nz; j++) {
553: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
554: }
556: /* U part */
557: pv = b->a + bdiag[i+1]+1;
558: pj = b->j + bdiag[i+1]+1;
559: nz = bdiag[i] - bdiag[i+1]-1;
560: for (j=0; j<nz; j++) {
561: pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]);
562: }
564: /* MatPivotCheck() */
565: sctx.rs = rs;
566: sctx.pv = rtmp[i];
567: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO){
568: MatPivotCheck_nz(info,sctx,i);
569: } else if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE){
570: MatPivotCheck_pd(info,sctx,i);
571: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
572: MatPivotCheck_inblocks(info,sctx,i);
573: } else {
574: MatPivotCheck_none(info,sctx,i);
575: }
576: rtmp[i] = sctx.pv;
578: /* Mark diagonal and invert diagonal for simplier triangular solves */
579: pv = b->a + bdiag[i];
580: *pv = 1.0/rtmp[i];
582: } /* endof for (i=0; i<n; i++){ */
584: /* MatPivotRefine() */
585: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.useshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){
586: /*
587: * if no shift in this attempt & shifting & started shifting & can refine,
588: * then try lower shift
589: */
590: sctx.shift_hi = sctx.shift_fraction;
591: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
592: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
593: sctx.useshift = PETSC_TRUE;
594: sctx.nshift++;
595: }
596: } while (sctx.useshift);
598: PetscFree(rtmp);
599: ISRestoreIndices(isicol,&ic);
600: ISRestoreIndices(isrow,&r);
601:
602: ISIdentity(isrow,&row_identity);
603: ISIdentity(isicol,&col_identity);
604: if (row_identity && col_identity) {
605: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
606: } else {
607: C->ops->solve = MatSolve_SeqAIJ;
608: }
609: C->ops->solveadd = MatSolveAdd_SeqAIJ;
610: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ;
611: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ;
612: C->ops->matsolve = MatMatSolve_SeqAIJ;
613: C->assembled = PETSC_TRUE;
614: C->preallocated = PETSC_TRUE;
615: PetscLogFlops(C->cmap->n);
617: /* MatShiftView(A,info,&sctx) */
618: if (sctx.nshift){
619: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
620: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
621: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
622: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
623: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
624: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
625: }
626: }
627: Mat_CheckInode_FactorLU(C,PETSC_FALSE);
628: return(0);
629: }
633: PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
634: {
635: Mat C=B;
636: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
637: IS isrow = b->row,isicol = b->icol;
638: PetscErrorCode ierr;
639: const PetscInt *r,*ic,*ics;
640: PetscInt nz,row,i,j,n=A->rmap->n,diag;
641: const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
642: const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj;
643: MatScalar *pv,*rtmp,*pc,multiplier,d;
644: const MatScalar *v,*aa=a->a;
645: PetscReal rs=0.0;
646: FactorShiftCtx sctx;
647: PetscInt newshift;
648: const PetscInt *ddiag;
649: PetscTruth row_identity, col_identity;
652: ISGetIndices(isrow,&r);
653: ISGetIndices(isicol,&ic);
654: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
655: ics = ic;
657: /* initialize shift context sctx */
658: sctx.nshift = 0;
659: sctx.nshift_max = 0;
660: sctx.shift_top = 0.0;
661: sctx.shift_lo = 0.0;
662: sctx.shift_hi = 0.0;
663: sctx.shift_fraction = 0.0;
664: sctx.shift_amount = 0.0;
666: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
667: ddiag = a->diag;
668: sctx.shift_top = info->zeropivot;
669: for (i=0; i<n; i++) {
670: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
671: d = (aa)[ddiag[i]];
672: rs = -PetscAbsScalar(d) - PetscRealPart(d);
673: v = aa+ai[i];
674: nz = ai[i+1] - ai[i];
675: for (j=0; j<nz; j++)
676: rs += PetscAbsScalar(v[j]);
677: if (rs>sctx.shift_top) sctx.shift_top = rs;
678: }
679: sctx.shift_top *= 1.1;
680: sctx.nshift_max = 5;
681: sctx.shift_lo = 0.;
682: sctx.shift_hi = 1.;
683: }
685: do {
686: sctx.useshift = PETSC_FALSE;
687: for (i=0; i<n; i++){
688: nz = bi[i+1] - bi[i];
689: bjtmp = bj + bi[i];
690: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
692: /* load in initial (unfactored row) */
693: nz = ai[r[i]+1] - ai[r[i]];
694: ajtmp = aj + ai[r[i]];
695: v = aa + ai[r[i]];
696: for (j=0; j<nz; j++) {
697: rtmp[ics[ajtmp[j]]] = v[j];
698: }
699: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
700: /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */
702: row = *bjtmp++;
703: while (row < i) {
704: pc = rtmp + row;
705: if (*pc != 0.0) {
706: pv = b->a + diag_offset[row];
707: pj = b->j + diag_offset[row] + 1;
708: multiplier = *pc / *pv++;
709: *pc = multiplier;
710: nz = bi[row+1] - diag_offset[row] - 1;
711: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
712: PetscLogFlops(2.0*nz);
713: }
714: row = *bjtmp++;
715: }
716: /* finished row so stick it into b->a */
717: pv = b->a + bi[i] ;
718: pj = b->j + bi[i] ;
719: nz = bi[i+1] - bi[i];
720: diag = diag_offset[i] - bi[i];
721: rs = 0.0;
722: for (j=0; j<nz; j++) {
723: pv[j] = rtmp[pj[j]];
724: rs += PetscAbsScalar(pv[j]);
725: }
726: rs -= PetscAbsScalar(pv[diag]);
728: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
729: sctx.rs = rs;
730: sctx.pv = pv[diag];
731: MatLUCheckShift_inline(info,sctx,i,newshift);
732: if (newshift == 1) break;
733: }
735: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.useshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
736: /*
737: * if no shift in this attempt & shifting & started shifting & can refine,
738: * then try lower shift
739: */
740: sctx.shift_hi = sctx.shift_fraction;
741: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
742: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
743: sctx.useshift = PETSC_TRUE;
744: sctx.nshift++;
745: }
746: } while (sctx.useshift);
748: /* invert diagonal entries for simplier triangular solves */
749: for (i=0; i<n; i++) {
750: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
751: }
752: PetscFree(rtmp);
753: ISRestoreIndices(isicol,&ic);
754: ISRestoreIndices(isrow,&r);
756: ISIdentity(isrow,&row_identity);
757: ISIdentity(isicol,&col_identity);
758: if (row_identity && col_identity) {
759: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace;
760: } else {
761: C->ops->solve = MatSolve_SeqAIJ_inplace;
762: }
763: C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
764: C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
765: C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
766: C->ops->matsolve = MatMatSolve_SeqAIJ_inplace;
767: C->assembled = PETSC_TRUE;
768: C->preallocated = PETSC_TRUE;
769: PetscLogFlops(C->cmap->n);
770: if (sctx.nshift){
771: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
772: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
773: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
774: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
775: }
776: }
777: (C)->ops->solve = MatSolve_SeqAIJ_inplace;
778: (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
779: Mat_CheckInode(C,PETSC_FALSE);
780: return(0);
781: }
783: /*
784: This routine implements inplace ILU(0) with row or/and column permutations.
785: Input:
786: A - original matrix
787: Output;
788: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
789: a->j (col index) is permuted by the inverse of colperm, then sorted
790: a->a reordered accordingly with a->j
791: a->diag (ptr to diagonal elements) is updated.
792: */
795: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info)
796: {
797: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
798: IS isrow = a->row,isicol = a->icol;
800: const PetscInt *r,*ic,*ics;
801: PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j;
802: PetscInt *ajtmp,nz,row;
803: PetscInt *diag = a->diag,nbdiag,*pj;
804: PetscScalar *rtmp,*pc,multiplier,d;
805: MatScalar *v,*pv;
806: PetscReal rs;
807: FactorShiftCtx sctx;
808: PetscInt newshift;
811: if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
812: ISGetIndices(isrow,&r);
813: ISGetIndices(isicol,&ic);
814: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
815: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
816: ics = ic;
818: sctx.shift_top = 0.;
819: sctx.nshift_max = 0;
820: sctx.shift_lo = 0.;
821: sctx.shift_hi = 0.;
822: sctx.shift_fraction = 0.;
824: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
825: sctx.shift_top = 0.;
826: for (i=0; i<n; i++) {
827: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
828: d = (a->a)[diag[i]];
829: rs = -PetscAbsScalar(d) - PetscRealPart(d);
830: v = a->a+ai[i];
831: nz = ai[i+1] - ai[i];
832: for (j=0; j<nz; j++)
833: rs += PetscAbsScalar(v[j]);
834: if (rs>sctx.shift_top) sctx.shift_top = rs;
835: }
836: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
837: sctx.shift_top *= 1.1;
838: sctx.nshift_max = 5;
839: sctx.shift_lo = 0.;
840: sctx.shift_hi = 1.;
841: }
843: sctx.shift_amount = 0.;
844: sctx.nshift = 0;
845: do {
846: sctx.useshift = PETSC_FALSE;
847: for (i=0; i<n; i++){
848: /* load in initial unfactored row */
849: nz = ai[r[i]+1] - ai[r[i]];
850: ajtmp = aj + ai[r[i]];
851: v = a->a + ai[r[i]];
852: /* sort permuted ajtmp and values v accordingly */
853: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
854: PetscSortIntWithScalarArray(nz,ajtmp,v);
856: diag[r[i]] = ai[r[i]];
857: for (j=0; j<nz; j++) {
858: rtmp[ajtmp[j]] = v[j];
859: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
860: }
861: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
863: row = *ajtmp++;
864: while (row < i) {
865: pc = rtmp + row;
866: if (*pc != 0.0) {
867: pv = a->a + diag[r[row]];
868: pj = aj + diag[r[row]] + 1;
870: multiplier = *pc / *pv++;
871: *pc = multiplier;
872: nz = ai[r[row]+1] - diag[r[row]] - 1;
873: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
874: PetscLogFlops(2.0*nz);
875: }
876: row = *ajtmp++;
877: }
878: /* finished row so overwrite it onto a->a */
879: pv = a->a + ai[r[i]] ;
880: pj = aj + ai[r[i]] ;
881: nz = ai[r[i]+1] - ai[r[i]];
882: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
883:
884: rs = 0.0;
885: for (j=0; j<nz; j++) {
886: pv[j] = rtmp[pj[j]];
887: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
888: }
890: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
891: sctx.rs = rs;
892: sctx.pv = pv[nbdiag];
893: MatLUCheckShift_inline(info,sctx,i,newshift);
894: if (newshift == 1) break;
895: }
897: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.useshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
898: /*
899: * if no shift in this attempt & shifting & started shifting & can refine,
900: * then try lower shift
901: */
902: sctx.shift_hi = sctx.shift_fraction;
903: sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
904: sctx.shift_amount = sctx.shift_fraction * sctx.shift_top;
905: sctx.useshift = PETSC_TRUE;
906: sctx.nshift++;
907: }
908: } while (sctx.useshift);
910: /* invert diagonal entries for simplier triangular solves */
911: for (i=0; i<n; i++) {
912: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
913: }
915: PetscFree(rtmp);
916: ISRestoreIndices(isicol,&ic);
917: ISRestoreIndices(isrow,&r);
918: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
919: A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace;
920: A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace;
921: A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace;
922: A->assembled = PETSC_TRUE;
923: A->preallocated = PETSC_TRUE;
924: PetscLogFlops(A->cmap->n);
925: if (sctx.nshift){
926: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
927: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
928: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
929: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
930: }
931: }
932: return(0);
933: }
935: /* ----------------------------------------------------------- */
938: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info)
939: {
941: Mat C;
944: MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);
945: MatLUFactorSymbolic(C,A,row,col,info);
946: MatLUFactorNumeric(C,A,info);
947: A->ops->solve = C->ops->solve;
948: A->ops->solvetranspose = C->ops->solvetranspose;
949: MatHeaderCopy(A,C);
950: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
951: return(0);
952: }
953: /* ----------------------------------------------------------- */
958: PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
959: {
960: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
961: IS iscol = a->col,isrow = a->row;
962: PetscErrorCode ierr;
963: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
964: PetscInt nz;
965: const PetscInt *rout,*cout,*r,*c;
966: PetscScalar *x,*tmp,*tmps,sum;
967: const PetscScalar *b;
968: const MatScalar *aa = a->a,*v;
969:
971: if (!n) return(0);
973: VecGetArray(bb,(PetscScalar**)&b);
974: VecGetArray(xx,&x);
975: tmp = a->solve_work;
977: ISGetIndices(isrow,&rout); r = rout;
978: ISGetIndices(iscol,&cout); c = cout + (n-1);
980: /* forward solve the lower triangular */
981: tmp[0] = b[*r++];
982: tmps = tmp;
983: for (i=1; i<n; i++) {
984: v = aa + ai[i] ;
985: vi = aj + ai[i] ;
986: nz = a->diag[i] - ai[i];
987: sum = b[*r++];
988: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
989: tmp[i] = sum;
990: }
992: /* backward solve the upper triangular */
993: for (i=n-1; i>=0; i--){
994: v = aa + a->diag[i] + 1;
995: vi = aj + a->diag[i] + 1;
996: nz = ai[i+1] - a->diag[i] - 1;
997: sum = tmp[i];
998: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
999: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
1000: }
1002: ISRestoreIndices(isrow,&rout);
1003: ISRestoreIndices(iscol,&cout);
1004: VecRestoreArray(bb,(PetscScalar**)&b);
1005: VecRestoreArray(xx,&x);
1006: PetscLogFlops(2.0*a->nz - A->cmap->n);
1007: return(0);
1008: }
1012: PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X)
1013: {
1014: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1015: IS iscol = a->col,isrow = a->row;
1016: PetscErrorCode ierr;
1017: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1018: PetscInt nz,neq;
1019: const PetscInt *rout,*cout,*r,*c;
1020: PetscScalar *x,*b,*tmp,*tmps,sum;
1021: const MatScalar *aa = a->a,*v;
1022: PetscTruth bisdense,xisdense;
1025: if (!n) return(0);
1027: PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1028: if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1029: PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1030: if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1032: MatGetArray(B,&b);
1033: MatGetArray(X,&x);
1034:
1035: tmp = a->solve_work;
1036: ISGetIndices(isrow,&rout); r = rout;
1037: ISGetIndices(iscol,&cout); c = cout;
1039: for (neq=0; neq<B->cmap->n; neq++){
1040: /* forward solve the lower triangular */
1041: tmp[0] = b[r[0]];
1042: tmps = tmp;
1043: for (i=1; i<n; i++) {
1044: v = aa + ai[i] ;
1045: vi = aj + ai[i] ;
1046: nz = a->diag[i] - ai[i];
1047: sum = b[r[i]];
1048: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1049: tmp[i] = sum;
1050: }
1051: /* backward solve the upper triangular */
1052: for (i=n-1; i>=0; i--){
1053: v = aa + a->diag[i] + 1;
1054: vi = aj + a->diag[i] + 1;
1055: nz = ai[i+1] - a->diag[i] - 1;
1056: sum = tmp[i];
1057: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1058: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
1059: }
1061: b += n;
1062: x += n;
1063: }
1064: ISRestoreIndices(isrow,&rout);
1065: ISRestoreIndices(iscol,&cout);
1066: MatRestoreArray(B,&b);
1067: MatRestoreArray(X,&x);
1068: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1069: return(0);
1070: }
1074: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
1075: {
1076: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1077: IS iscol = a->col,isrow = a->row;
1078: PetscErrorCode ierr;
1079: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1080: PetscInt nz,neq;
1081: const PetscInt *rout,*cout,*r,*c;
1082: PetscScalar *x,*b,*tmp,sum;
1083: const MatScalar *aa = a->a,*v;
1084: PetscTruth bisdense,xisdense;
1087: if (!n) return(0);
1089: PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);
1090: if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix");
1091: PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);
1092: if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix");
1094: MatGetArray(B,&b);
1095: MatGetArray(X,&x);
1096:
1097: tmp = a->solve_work;
1098: ISGetIndices(isrow,&rout); r = rout;
1099: ISGetIndices(iscol,&cout); c = cout;
1101: for (neq=0; neq<B->cmap->n; neq++){
1102: /* forward solve the lower triangular */
1103: tmp[0] = b[r[0]];
1104: v = aa;
1105: vi = aj;
1106: for (i=1; i<n; i++) {
1107: nz = ai[i+1] - ai[i];
1108: sum = b[r[i]];
1109: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1110: tmp[i] = sum;
1111: v += nz; vi += nz;
1112: }
1114: /* backward solve the upper triangular */
1115: for (i=n-1; i>=0; i--){
1116: v = aa + adiag[i+1]+1;
1117: vi = aj + adiag[i+1]+1;
1118: nz = adiag[i]-adiag[i+1]-1;
1119: sum = tmp[i];
1120: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
1121: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
1122: }
1123:
1124: b += n;
1125: x += n;
1126: }
1127: ISRestoreIndices(isrow,&rout);
1128: ISRestoreIndices(iscol,&cout);
1129: MatRestoreArray(B,&b);
1130: MatRestoreArray(X,&x);
1131: PetscLogFlops(B->cmap->n*(2.0*a->nz - n));
1132: return(0);
1133: }
1137: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
1138: {
1139: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1140: IS iscol = a->col,isrow = a->row;
1141: PetscErrorCode ierr;
1142: const PetscInt *r,*c,*rout,*cout;
1143: PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j;
1144: PetscInt nz,row;
1145: PetscScalar *x,*b,*tmp,*tmps,sum;
1146: const MatScalar *aa = a->a,*v;
1149: if (!n) return(0);
1151: VecGetArray(bb,&b);
1152: VecGetArray(xx,&x);
1153: tmp = a->solve_work;
1155: ISGetIndices(isrow,&rout); r = rout;
1156: ISGetIndices(iscol,&cout); c = cout + (n-1);
1158: /* forward solve the lower triangular */
1159: tmp[0] = b[*r++];
1160: tmps = tmp;
1161: for (row=1; row<n; row++) {
1162: i = rout[row]; /* permuted row */
1163: v = aa + ai[i] ;
1164: vi = aj + ai[i] ;
1165: nz = a->diag[i] - ai[i];
1166: sum = b[*r++];
1167: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1168: tmp[row] = sum;
1169: }
1171: /* backward solve the upper triangular */
1172: for (row=n-1; row>=0; row--){
1173: i = rout[row]; /* permuted row */
1174: v = aa + a->diag[i] + 1;
1175: vi = aj + a->diag[i] + 1;
1176: nz = ai[i+1] - a->diag[i] - 1;
1177: sum = tmp[row];
1178: PetscSparseDenseMinusDot(sum,tmps,v,vi,nz);
1179: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
1180: }
1182: ISRestoreIndices(isrow,&rout);
1183: ISRestoreIndices(iscol,&cout);
1184: VecRestoreArray(bb,&b);
1185: VecRestoreArray(xx,&x);
1186: PetscLogFlops(2.0*a->nz - A->cmap->n);
1187: return(0);
1188: }
1190: /* ----------------------------------------------------------- */
1191: #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h"
1194: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
1195: {
1196: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1197: PetscErrorCode ierr;
1198: PetscInt n = A->rmap->n;
1199: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag;
1200: PetscScalar *x;
1201: const PetscScalar *b;
1202: const MatScalar *aa = a->a;
1203: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1204: PetscInt adiag_i,i,nz,ai_i;
1205: const PetscInt *vi;
1206: const MatScalar *v;
1207: PetscScalar sum;
1208: #endif
1211: if (!n) return(0);
1213: VecGetArray(bb,(PetscScalar**)&b);
1214: VecGetArray(xx,&x);
1216: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
1217: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
1218: #else
1219: /* forward solve the lower triangular */
1220: x[0] = b[0];
1221: for (i=1; i<n; i++) {
1222: ai_i = ai[i];
1223: v = aa + ai_i;
1224: vi = aj + ai_i;
1225: nz = adiag[i] - ai_i;
1226: sum = b[i];
1227: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1228: x[i] = sum;
1229: }
1231: /* backward solve the upper triangular */
1232: for (i=n-1; i>=0; i--){
1233: adiag_i = adiag[i];
1234: v = aa + adiag_i + 1;
1235: vi = aj + adiag_i + 1;
1236: nz = ai[i+1] - adiag_i - 1;
1237: sum = x[i];
1238: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
1239: x[i] = sum*aa[adiag_i];
1240: }
1241: #endif
1242: PetscLogFlops(2.0*a->nz - A->cmap->n);
1243: VecRestoreArray(bb,(PetscScalar**)&b);
1244: VecRestoreArray(xx,&x);
1245: return(0);
1246: }
1250: PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx)
1251: {
1252: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1253: IS iscol = a->col,isrow = a->row;
1254: PetscErrorCode ierr;
1255: PetscInt i, n = A->rmap->n,j;
1256: PetscInt nz;
1257: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j;
1258: PetscScalar *x,*tmp,sum;
1259: const PetscScalar *b;
1260: const MatScalar *aa = a->a,*v;
1263: if (yy != xx) {VecCopy(yy,xx);}
1265: VecGetArray(bb,(PetscScalar**)&b);
1266: VecGetArray(xx,&x);
1267: tmp = a->solve_work;
1269: ISGetIndices(isrow,&rout); r = rout;
1270: ISGetIndices(iscol,&cout); c = cout + (n-1);
1272: /* forward solve the lower triangular */
1273: tmp[0] = b[*r++];
1274: for (i=1; i<n; i++) {
1275: v = aa + ai[i] ;
1276: vi = aj + ai[i] ;
1277: nz = a->diag[i] - ai[i];
1278: sum = b[*r++];
1279: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1280: tmp[i] = sum;
1281: }
1283: /* backward solve the upper triangular */
1284: for (i=n-1; i>=0; i--){
1285: v = aa + a->diag[i] + 1;
1286: vi = aj + a->diag[i] + 1;
1287: nz = ai[i+1] - a->diag[i] - 1;
1288: sum = tmp[i];
1289: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1290: tmp[i] = sum*aa[a->diag[i]];
1291: x[*c--] += tmp[i];
1292: }
1294: ISRestoreIndices(isrow,&rout);
1295: ISRestoreIndices(iscol,&cout);
1296: VecRestoreArray(bb,(PetscScalar**)&b);
1297: VecRestoreArray(xx,&x);
1298: PetscLogFlops(2.0*a->nz);
1300: return(0);
1301: }
1305: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
1306: {
1307: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1308: IS iscol = a->col,isrow = a->row;
1309: PetscErrorCode ierr;
1310: PetscInt i, n = A->rmap->n,j;
1311: PetscInt nz;
1312: const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag;
1313: PetscScalar *x,*tmp,sum;
1314: const PetscScalar *b;
1315: const MatScalar *aa = a->a,*v;
1318: if (yy != xx) {VecCopy(yy,xx);}
1320: VecGetArray(bb,(PetscScalar**)&b);
1321: VecGetArray(xx,&x);
1322: tmp = a->solve_work;
1324: ISGetIndices(isrow,&rout); r = rout;
1325: ISGetIndices(iscol,&cout); c = cout;
1327: /* forward solve the lower triangular */
1328: tmp[0] = b[r[0]];
1329: v = aa;
1330: vi = aj;
1331: for (i=1; i<n; i++) {
1332: nz = ai[i+1] - ai[i];
1333: sum = b[r[i]];
1334: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1335: tmp[i] = sum;
1336: v += nz; vi += nz;
1337: }
1339: /* backward solve the upper triangular */
1340: v = aa + adiag[n-1];
1341: vi = aj + adiag[n-1];
1342: for (i=n-1; i>=0; i--){
1343: nz = adiag[i] - adiag[i+1] - 1;
1344: sum = tmp[i];
1345: for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]];
1346: tmp[i] = sum*v[nz];
1347: x[c[i]] += tmp[i];
1348: v += nz+1; vi += nz+1;
1349: }
1351: ISRestoreIndices(isrow,&rout);
1352: ISRestoreIndices(iscol,&cout);
1353: VecRestoreArray(bb,(PetscScalar**)&b);
1354: VecRestoreArray(xx,&x);
1355: PetscLogFlops(2.0*a->nz);
1357: return(0);
1358: }
1362: PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx)
1363: {
1364: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1365: IS iscol = a->col,isrow = a->row;
1366: PetscErrorCode ierr;
1367: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1368: PetscInt i,n = A->rmap->n,j;
1369: PetscInt nz;
1370: PetscScalar *x,*tmp,s1;
1371: const MatScalar *aa = a->a,*v;
1372: const PetscScalar *b;
1375: VecGetArray(bb,(PetscScalar**)&b);
1376: VecGetArray(xx,&x);
1377: tmp = a->solve_work;
1379: ISGetIndices(isrow,&rout); r = rout;
1380: ISGetIndices(iscol,&cout); c = cout;
1382: /* copy the b into temp work space according to permutation */
1383: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1385: /* forward solve the U^T */
1386: for (i=0; i<n; i++) {
1387: v = aa + diag[i] ;
1388: vi = aj + diag[i] + 1;
1389: nz = ai[i+1] - diag[i] - 1;
1390: s1 = tmp[i];
1391: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1392: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1393: tmp[i] = s1;
1394: }
1396: /* backward solve the L^T */
1397: for (i=n-1; i>=0; i--){
1398: v = aa + diag[i] - 1 ;
1399: vi = aj + diag[i] - 1 ;
1400: nz = diag[i] - ai[i];
1401: s1 = tmp[i];
1402: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1403: }
1405: /* copy tmp into x according to permutation */
1406: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1408: ISRestoreIndices(isrow,&rout);
1409: ISRestoreIndices(iscol,&cout);
1410: VecRestoreArray(bb,(PetscScalar**)&b);
1411: VecRestoreArray(xx,&x);
1413: PetscLogFlops(2.0*a->nz-A->cmap->n);
1414: return(0);
1415: }
1419: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1420: {
1421: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1422: IS iscol = a->col,isrow = a->row;
1423: PetscErrorCode ierr;
1424: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1425: PetscInt i,n = A->rmap->n,j;
1426: PetscInt nz;
1427: PetscScalar *x,*tmp,s1;
1428: const MatScalar *aa = a->a,*v;
1429: const PetscScalar *b;
1432: VecGetArray(bb,(PetscScalar**)&b);
1433: VecGetArray(xx,&x);
1434: tmp = a->solve_work;
1436: ISGetIndices(isrow,&rout); r = rout;
1437: ISGetIndices(iscol,&cout); c = cout;
1439: /* copy the b into temp work space according to permutation */
1440: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1442: /* forward solve the U^T */
1443: for (i=0; i<n; i++) {
1444: v = aa + adiag[i+1] + 1;
1445: vi = aj + adiag[i+1] + 1;
1446: nz = adiag[i] - adiag[i+1] - 1;
1447: s1 = tmp[i];
1448: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1449: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1450: tmp[i] = s1;
1451: }
1453: /* backward solve the L^T */
1454: for (i=n-1; i>=0; i--){
1455: v = aa + ai[i];
1456: vi = aj + ai[i];
1457: nz = ai[i+1] - ai[i];
1458: s1 = tmp[i];
1459: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1460: }
1462: /* copy tmp into x according to permutation */
1463: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1465: ISRestoreIndices(isrow,&rout);
1466: ISRestoreIndices(iscol,&cout);
1467: VecRestoreArray(bb,(PetscScalar**)&b);
1468: VecRestoreArray(xx,&x);
1470: PetscLogFlops(2.0*a->nz-A->cmap->n);
1471: return(0);
1472: }
1476: PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx)
1477: {
1478: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1479: IS iscol = a->col,isrow = a->row;
1480: PetscErrorCode ierr;
1481: const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi;
1482: PetscInt i,n = A->rmap->n,j;
1483: PetscInt nz;
1484: PetscScalar *x,*tmp,s1;
1485: const MatScalar *aa = a->a,*v;
1486: const PetscScalar *b;
1489: if (zz != xx) {VecCopy(zz,xx);}
1490: VecGetArray(bb,(PetscScalar**)&b);
1491: VecGetArray(xx,&x);
1492: tmp = a->solve_work;
1494: ISGetIndices(isrow,&rout); r = rout;
1495: ISGetIndices(iscol,&cout); c = cout;
1497: /* copy the b into temp work space according to permutation */
1498: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1500: /* forward solve the U^T */
1501: for (i=0; i<n; i++) {
1502: v = aa + diag[i] ;
1503: vi = aj + diag[i] + 1;
1504: nz = ai[i+1] - diag[i] - 1;
1505: s1 = tmp[i];
1506: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1507: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1508: tmp[i] = s1;
1509: }
1511: /* backward solve the L^T */
1512: for (i=n-1; i>=0; i--){
1513: v = aa + diag[i] - 1 ;
1514: vi = aj + diag[i] - 1 ;
1515: nz = diag[i] - ai[i];
1516: s1 = tmp[i];
1517: for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j];
1518: }
1520: /* copy tmp into x according to permutation */
1521: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1523: ISRestoreIndices(isrow,&rout);
1524: ISRestoreIndices(iscol,&cout);
1525: VecRestoreArray(bb,(PetscScalar**)&b);
1526: VecRestoreArray(xx,&x);
1528: PetscLogFlops(2.0*a->nz-A->cmap->n);
1529: return(0);
1530: }
1534: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1535: {
1536: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1537: IS iscol = a->col,isrow = a->row;
1538: PetscErrorCode ierr;
1539: const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi;
1540: PetscInt i,n = A->rmap->n,j;
1541: PetscInt nz;
1542: PetscScalar *x,*tmp,s1;
1543: const MatScalar *aa = a->a,*v;
1544: const PetscScalar *b;
1547: if (zz != xx) {VecCopy(zz,xx);}
1548: VecGetArray(bb,(PetscScalar**)&b);
1549: VecGetArray(xx,&x);
1550: tmp = a->solve_work;
1552: ISGetIndices(isrow,&rout); r = rout;
1553: ISGetIndices(iscol,&cout); c = cout;
1555: /* copy the b into temp work space according to permutation */
1556: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1558: /* forward solve the U^T */
1559: for (i=0; i<n; i++) {
1560: v = aa + adiag[i+1] + 1;
1561: vi = aj + adiag[i+1] + 1;
1562: nz = adiag[i] - adiag[i+1] - 1;
1563: s1 = tmp[i];
1564: s1 *= v[nz]; /* multiply by inverse of diagonal entry */
1565: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1566: tmp[i] = s1;
1567: }
1570: /* backward solve the L^T */
1571: for (i=n-1; i>=0; i--){
1572: v = aa + ai[i] ;
1573: vi = aj + ai[i];
1574: nz = ai[i+1] - ai[i];
1575: s1 = tmp[i];
1576: for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j];
1577: }
1579: /* copy tmp into x according to permutation */
1580: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1582: ISRestoreIndices(isrow,&rout);
1583: ISRestoreIndices(iscol,&cout);
1584: VecRestoreArray(bb,(PetscScalar**)&b);
1585: VecRestoreArray(xx,&x);
1587: PetscLogFlops(2.0*a->nz-A->cmap->n);
1588: return(0);
1589: }
1591: /* ----------------------------------------------------------------*/
1593: EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth);
1595: /*
1596: ilu() under revised new data structure.
1597: Factored arrays bj and ba are stored as
1598: L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:)
1600: bi=fact->i is an array of size n+1, in which
1601: bi+
1602: bi[i]: points to 1st entry of L(i,:),i=0,...,n-1
1603: bi[n]: points to L(n-1,n-1)+1
1604:
1605: bdiag=fact->diag is an array of size n+1,in which
1606: bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1
1607: bdiag[n]: points to entry of U(n-1,0)-1
1609: U(i,:) contains bdiag[i] as its last entry, i.e.,
1610: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
1611: */
1614: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1615: {
1616:
1617: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1618: PetscErrorCode ierr;
1619: const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag;
1620: PetscInt i,j,k=0,nz,*bi,*bj,*bdiag;
1621: PetscTruth missing;
1622: IS isicol;
1625: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1626: MatMissingDiagonal(A,&missing,&i);
1627: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
1628: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1630: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);
1631: b = (Mat_SeqAIJ*)(fact)->data;
1633: /* allocate matrix arrays for new data structure */
1634: PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);
1635: PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));
1636: b->singlemalloc = PETSC_TRUE;
1637: if (!b->diag){
1638: PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);
1639: PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));
1640: }
1641: bdiag = b->diag;
1642:
1643: if (n > 0) {
1644: PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));
1645: }
1646:
1647: /* set bi and bj with new data structure */
1648: bi = b->i;
1649: bj = b->j;
1651: /* L part */
1652: bi[0] = 0;
1653: for (i=0; i<n; i++){
1654: nz = adiag[i] - ai[i];
1655: bi[i+1] = bi[i] + nz;
1656: aj = a->j + ai[i];
1657: for (j=0; j<nz; j++){
1658: /* *bj = aj[j]; bj++; */
1659: bj[k++] = aj[j];
1660: }
1661: }
1662:
1663: /* U part */
1664: bdiag[n] = bi[n]-1;
1665: for (i=n-1; i>=0; i--){
1666: nz = ai[i+1] - adiag[i] - 1;
1667: aj = a->j + adiag[i] + 1;
1668: for (j=0; j<nz; j++){
1669: /* *bj = aj[j]; bj++; */
1670: bj[k++] = aj[j];
1671: }
1672: /* diag[i] */
1673: /* *bj = i; bj++; */
1674: bj[k++] = i;
1675: bdiag[i] = bdiag[i+1] + nz + 1;
1676: }
1678: fact->factor = MAT_FACTOR_ILU;
1679: fact->info.factor_mallocs = 0;
1680: fact->info.fill_ratio_given = info->fill;
1681: fact->info.fill_ratio_needed = 1.0;
1682: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1684: b = (Mat_SeqAIJ*)(fact)->data;
1685: b->row = isrow;
1686: b->col = iscol;
1687: b->icol = isicol;
1688: PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1689: PetscObjectReference((PetscObject)isrow);
1690: PetscObjectReference((PetscObject)iscol);
1691: return(0);
1692: }
1696: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1697: {
1698: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1699: IS isicol;
1700: PetscErrorCode ierr;
1701: const PetscInt *r,*ic;
1702: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j;
1703: PetscInt *bi,*cols,nnz,*cols_lvl;
1704: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1705: PetscInt i,levels,diagonal_fill;
1706: PetscTruth col_identity,row_identity;
1707: PetscReal f;
1708: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1709: PetscBT lnkbt;
1710: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1711: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1712: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1713:
1715: /* Uncomment the old data struct part only while testing new data structure for MatSolve() */
1716: /*
1717: PetscTruth olddatastruct=PETSC_FALSE;
1718: PetscOptionsGetTruth(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);
1719: if(olddatastruct){
1720: MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);
1721: return(0);
1722: }
1723: */
1724:
1725: levels = (PetscInt)info->levels;
1726: ISIdentity(isrow,&row_identity);
1727: ISIdentity(iscol,&col_identity);
1729: if (!levels && row_identity && col_identity) {
1730: /* special case: ilu(0) with natural ordering */
1731: MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);
1732: if (a->inode.size) {
1733: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1734: }
1735: return(0);
1736: }
1738: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1739: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1740: ISGetIndices(isrow,&r);
1741: ISGetIndices(isicol,&ic);
1743: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1744: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1745: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1746: bi[0] = bdiag[0] = 0;
1748: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1750: /* create a linked list for storing column indices of the active row */
1751: nlnk = n + 1;
1752: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1754: /* initial FreeSpace size is f*(ai[n]+1) */
1755: f = info->fill;
1756: diagonal_fill = (PetscInt)info->diagonal_fill;
1757: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1758: current_space = free_space;
1759: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1760: current_space_lvl = free_space_lvl;
1761:
1762: for (i=0; i<n; i++) {
1763: nzi = 0;
1764: /* copy current row into linked list */
1765: nnz = ai[r[i]+1] - ai[r[i]];
1766: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1767: cols = aj + ai[r[i]];
1768: lnk[i] = -1; /* marker to indicate if diagonal exists */
1769: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1770: nzi += nlnk;
1772: /* make sure diagonal entry is included */
1773: if (diagonal_fill && lnk[i] == -1) {
1774: fm = n;
1775: while (lnk[fm] < i) fm = lnk[fm];
1776: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1777: lnk[fm] = i;
1778: lnk_lvl[i] = 0;
1779: nzi++; dcount++;
1780: }
1782: /* add pivot rows into the active row */
1783: nzbd = 0;
1784: prow = lnk[n];
1785: while (prow < i) {
1786: nnz = bdiag[prow];
1787: cols = bj_ptr[prow] + nnz + 1;
1788: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1789: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1790: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1791: nzi += nlnk;
1792: prow = lnk[prow];
1793: nzbd++;
1794: }
1795: bdiag[i] = nzbd;
1796: bi[i+1] = bi[i] + nzi;
1798: /* if free space is not available, make more free space */
1799: if (current_space->local_remaining<nzi) {
1800: nnz = 2*nzi*(n - i); /* estimated and max additional space needed */
1801: PetscFreeSpaceGet(nnz,¤t_space);
1802: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1803: reallocs++;
1804: }
1806: /* copy data into free_space and free_space_lvl, then initialize lnk */
1807: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1808: bj_ptr[i] = current_space->array;
1809: bjlvl_ptr[i] = current_space_lvl->array;
1811: /* make sure the active row i has diagonal entry */
1812: if (*(bj_ptr[i]+bdiag[i]) != i) {
1813: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1814: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1815: }
1817: current_space->array += nzi;
1818: current_space->local_used += nzi;
1819: current_space->local_remaining -= nzi;
1820: current_space_lvl->array += nzi;
1821: current_space_lvl->local_used += nzi;
1822: current_space_lvl->local_remaining -= nzi;
1823: }
1825: ISRestoreIndices(isrow,&r);
1826: ISRestoreIndices(isicol,&ic);
1828: /* destroy list of free space and other temporary arrays */
1829: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1831: /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
1832: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
1833:
1834: PetscIncompleteLLDestroy(lnk,lnkbt);
1835: PetscFreeSpaceDestroy(free_space_lvl);
1836: PetscFree2(bj_ptr,bjlvl_ptr);
1838: #if defined(PETSC_USE_INFO)
1839: {
1840: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1841: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1842: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1843: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1844: PetscInfo(A,"for best performance.\n");
1845: if (diagonal_fill) {
1846: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1847: }
1848: }
1849: #endif
1851: /* put together the new matrix */
1852: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1853: PetscLogObjectParent(fact,isicol);
1854: b = (Mat_SeqAIJ*)(fact)->data;
1855: b->free_a = PETSC_TRUE;
1856: b->free_ij = PETSC_TRUE;
1857: b->singlemalloc = PETSC_FALSE;
1858: PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);
1859: b->j = bj;
1860: b->i = bi;
1861: b->diag = bdiag;
1862: b->ilen = 0;
1863: b->imax = 0;
1864: b->row = isrow;
1865: b->col = iscol;
1866: PetscObjectReference((PetscObject)isrow);
1867: PetscObjectReference((PetscObject)iscol);
1868: b->icol = isicol;
1869: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1870: /* In b structure: Free imax, ilen, old a, old j.
1871: Allocate bdiag, solve_work, new a, new j */
1872: PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));
1873: b->maxnz = b->nz = bdiag[0]+1;
1874: (fact)->info.factor_mallocs = reallocs;
1875: (fact)->info.fill_ratio_given = f;
1876: (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
1877: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
1878: if (a->inode.size) {
1879: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
1880: }
1881: return(0);
1882: }
1886: PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
1887: {
1888: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1889: IS isicol;
1890: PetscErrorCode ierr;
1891: const PetscInt *r,*ic;
1892: PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d;
1893: PetscInt *bi,*cols,nnz,*cols_lvl;
1894: PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0;
1895: PetscInt i,levels,diagonal_fill;
1896: PetscTruth col_identity,row_identity;
1897: PetscReal f;
1898: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1899: PetscBT lnkbt;
1900: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1901: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1902: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1903: PetscTruth missing;
1904:
1906: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
1907: f = info->fill;
1908: levels = (PetscInt)info->levels;
1909: diagonal_fill = (PetscInt)info->diagonal_fill;
1910: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1912: ISIdentity(isrow,&row_identity);
1913: ISIdentity(iscol,&col_identity);
1914: if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */
1915: MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);
1916: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
1917: if (a->inode.size) {
1918: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
1919: }
1920: fact->factor = MAT_FACTOR_ILU;
1921: (fact)->info.factor_mallocs = 0;
1922: (fact)->info.fill_ratio_given = info->fill;
1923: (fact)->info.fill_ratio_needed = 1.0;
1924: b = (Mat_SeqAIJ*)(fact)->data;
1925: MatMissingDiagonal(A,&missing,&d);
1926: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1927: b->row = isrow;
1928: b->col = iscol;
1929: b->icol = isicol;
1930: PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);
1931: PetscObjectReference((PetscObject)isrow);
1932: PetscObjectReference((PetscObject)iscol);
1933: return(0);
1934: }
1936: ISGetIndices(isrow,&r);
1937: ISGetIndices(isicol,&ic);
1939: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
1940: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1941: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1942: bi[0] = bdiag[0] = 0;
1944: PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);
1946: /* create a linked list for storing column indices of the active row */
1947: nlnk = n + 1;
1948: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1950: /* initial FreeSpace size is f*(ai[n]+1) */
1951: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1952: current_space = free_space;
1953: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1954: current_space_lvl = free_space_lvl;
1955:
1956: for (i=0; i<n; i++) {
1957: nzi = 0;
1958: /* copy current row into linked list */
1959: nnz = ai[r[i]+1] - ai[r[i]];
1960: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
1961: cols = aj + ai[r[i]];
1962: lnk[i] = -1; /* marker to indicate if diagonal exists */
1963: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1964: nzi += nlnk;
1966: /* make sure diagonal entry is included */
1967: if (diagonal_fill && lnk[i] == -1) {
1968: fm = n;
1969: while (lnk[fm] < i) fm = lnk[fm];
1970: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1971: lnk[fm] = i;
1972: lnk_lvl[i] = 0;
1973: nzi++; dcount++;
1974: }
1976: /* add pivot rows into the active row */
1977: nzbd = 0;
1978: prow = lnk[n];
1979: while (prow < i) {
1980: nnz = bdiag[prow];
1981: cols = bj_ptr[prow] + nnz + 1;
1982: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1983: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1984: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1985: nzi += nlnk;
1986: prow = lnk[prow];
1987: nzbd++;
1988: }
1989: bdiag[i] = nzbd;
1990: bi[i+1] = bi[i] + nzi;
1992: /* if free space is not available, make more free space */
1993: if (current_space->local_remaining<nzi) {
1994: nnz = nzi*(n - i); /* estimated and max additional space needed */
1995: PetscFreeSpaceGet(nnz,¤t_space);
1996: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1997: reallocs++;
1998: }
2000: /* copy data into free_space and free_space_lvl, then initialize lnk */
2001: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2002: bj_ptr[i] = current_space->array;
2003: bjlvl_ptr[i] = current_space_lvl->array;
2005: /* make sure the active row i has diagonal entry */
2006: if (*(bj_ptr[i]+bdiag[i]) != i) {
2007: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
2008: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
2009: }
2011: current_space->array += nzi;
2012: current_space->local_used += nzi;
2013: current_space->local_remaining -= nzi;
2014: current_space_lvl->array += nzi;
2015: current_space_lvl->local_used += nzi;
2016: current_space_lvl->local_remaining -= nzi;
2017: }
2019: ISRestoreIndices(isrow,&r);
2020: ISRestoreIndices(isicol,&ic);
2022: /* destroy list of free space and other temporary arrays */
2023: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
2024: PetscFreeSpaceContiguous(&free_space,bj); /* copy free_space -> bj */
2025: PetscIncompleteLLDestroy(lnk,lnkbt);
2026: PetscFreeSpaceDestroy(free_space_lvl);
2027: PetscFree2(bj_ptr,bjlvl_ptr);
2029: #if defined(PETSC_USE_INFO)
2030: {
2031: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2032: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2033: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
2034: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
2035: PetscInfo(A,"for best performance.\n");
2036: if (diagonal_fill) {
2037: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
2038: }
2039: }
2040: #endif
2042: /* put together the new matrix */
2043: MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
2044: PetscLogObjectParent(fact,isicol);
2045: b = (Mat_SeqAIJ*)(fact)->data;
2046: b->free_a = PETSC_TRUE;
2047: b->free_ij = PETSC_TRUE;
2048: b->singlemalloc = PETSC_FALSE;
2049: PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);
2050: b->j = bj;
2051: b->i = bi;
2052: for (i=0; i<n; i++) bdiag[i] += bi[i];
2053: b->diag = bdiag;
2054: b->ilen = 0;
2055: b->imax = 0;
2056: b->row = isrow;
2057: b->col = iscol;
2058: PetscObjectReference((PetscObject)isrow);
2059: PetscObjectReference((PetscObject)iscol);
2060: b->icol = isicol;
2061: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
2062: /* In b structure: Free imax, ilen, old a, old j.
2063: Allocate bdiag, solve_work, new a, new j */
2064: PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
2065: b->maxnz = b->nz = bi[n] ;
2066: (fact)->info.factor_mallocs = reallocs;
2067: (fact)->info.fill_ratio_given = f;
2068: (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
2069: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace;
2070: if (a->inode.size) {
2071: (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace;
2072: }
2073: return(0);
2074: }
2078: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info)
2079: {
2080: Mat C = B;
2081: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2082: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2083: IS ip=b->row,iip = b->icol;
2085: const PetscInt *rip,*riip;
2086: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
2087: PetscInt *ai=a->i,*aj=a->j;
2088: PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
2089: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2090: PetscTruth perm_identity;
2092: FactorShiftCtx sctx;
2093: PetscReal rs;
2094: MatScalar d,*v;
2097: /* MatPivotSetUp(): initialize shift context sctx */
2098: PetscMemzero(&sctx,sizeof(FactorShiftCtx));
2100: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
2101: sctx.shift_top = info->zeropivot;
2102: for (i=0; i<mbs; i++) {
2103: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
2104: d = (aa)[a->diag[i]];
2105: rs = -PetscAbsScalar(d) - PetscRealPart(d);
2106: v = aa+ai[i];
2107: nz = ai[i+1] - ai[i];
2108: for (j=0; j<nz; j++)
2109: rs += PetscAbsScalar(v[j]);
2110: if (rs>sctx.shift_top) sctx.shift_top = rs;
2111: }
2112: sctx.shift_top *= 1.1;
2113: sctx.nshift_max = 5;
2114: sctx.shift_lo = 0.;
2115: sctx.shift_hi = 1.;
2116: }
2118: ISGetIndices(ip,&rip);
2119: ISGetIndices(iip,&riip);
2120:
2121: /* allocate working arrays
2122: c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
2123: il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
2124: */
2125: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);
2126:
2127: do {
2128: sctx.useshift = PETSC_FALSE;
2130: for (i=0; i<mbs; i++) c2r[i] = mbs;
2131: il[0] = 0;
2132:
2133: for (k = 0; k<mbs; k++){
2134: /* zero rtmp */
2135: nz = bi[k+1] - bi[k];
2136: bjtmp = bj + bi[k];
2137: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2138:
2139: /* load in initial unfactored row */
2140: bval = ba + bi[k];
2141: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2142: for (j = jmin; j < jmax; j++){
2143: col = riip[aj[j]];
2144: if (col >= k){ /* only take upper triangular entry */
2145: rtmp[col] = aa[j];
2146: *bval++ = 0.0; /* for in-place factorization */
2147: }
2148: }
2149: /* shift the diagonal of the matrix: ZeropivotApply() */
2150: rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */
2151:
2152: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2153: dk = rtmp[k];
2154: i = c2r[k]; /* first row to be added to k_th row */
2156: while (i < k){
2157: nexti = c2r[i]; /* next row to be added to k_th row */
2158:
2159: /* compute multiplier, update diag(k) and U(i,k) */
2160: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2161: uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
2162: dk += uikdi*ba[ili]; /* update diag[k] */
2163: ba[ili] = uikdi; /* -U(i,k) */
2165: /* add multiple of row i to k-th row */
2166: jmin = ili + 1; jmax = bi[i+1];
2167: if (jmin < jmax){
2168: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2169: /* update il and c2r for row i */
2170: il[i] = jmin;
2171: j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
2172: }
2173: i = nexti;
2174: }
2176: /* copy data into U(k,:) */
2177: rs = 0.0;
2178: jmin = bi[k]; jmax = bi[k+1]-1;
2179: if (jmin < jmax) {
2180: for (j=jmin; j<jmax; j++){
2181: col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
2182: }
2183: /* add the k-th row into il and c2r */
2184: il[k] = jmin;
2185: i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
2186: }
2188: /* MatPivotCheck() */
2189: sctx.rs = rs;
2190: sctx.pv = dk;
2191: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO){
2192: MatPivotCheck_nz(info,sctx,k);
2193: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE){
2194: MatPivotCheck_pd(info,sctx,k);
2195: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2196: MatPivotCheck_inblocks(info,sctx,k);
2197: } else {
2198: MatPivotCheck_none(info,sctx,k);
2199: }
2200: dk = sctx.pv;
2201:
2202: ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
2203: }
2204: } while (sctx.useshift);
2205:
2206: PetscFree3(rtmp,il,c2r);
2207: ISRestoreIndices(ip,&rip);
2208: ISRestoreIndices(iip,&riip);
2210: ISIdentity(ip,&perm_identity);
2211: if (perm_identity){
2212: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2213: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2214: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
2215: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
2216: } else {
2217: B->ops->solve = MatSolve_SeqSBAIJ_1;
2218: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
2219: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
2220: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
2221: }
2223: C->assembled = PETSC_TRUE;
2224: C->preallocated = PETSC_TRUE;
2225: PetscLogFlops(C->rmap->n);
2227: /* MatPivotView() */
2228: if (sctx.nshift){
2229: if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2230: PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);
2231: } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2232: PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2233: } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){
2234: PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);
2235: }
2236: }
2237: return(0);
2238: }
2242: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info)
2243: {
2244: Mat C = B;
2245: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
2246: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
2247: IS ip=b->row,iip = b->icol;
2249: const PetscInt *rip,*riip;
2250: PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp;
2251: PetscInt *ai=a->i,*aj=a->j;
2252: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
2253: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
2254: PetscReal zeropivot,rs;
2255: ChShift_Ctx sctx;
2256: PetscInt newshift;
2257: PetscTruth perm_identity;
2260: zeropivot = info->zeropivot;
2262: ISGetIndices(ip,&rip);
2263: ISGetIndices(iip,&riip);
2264:
2265: /* initialization */
2266: PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);
2267: sctx.shift_amount = 0;
2268: sctx.nshift = 0;
2269: do {
2270: sctx.chshift = PETSC_FALSE;
2271: for (i=0; i<mbs; i++) jl[i] = mbs;
2272: il[0] = 0;
2273:
2274: for (k = 0; k<mbs; k++){
2275: /* zero rtmp */
2276: nz = bi[k+1] - bi[k];
2277: bjtmp = bj + bi[k];
2278: for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;
2280: bval = ba + bi[k];
2281: /* initialize k-th row by the perm[k]-th row of A */
2282: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
2283: for (j = jmin; j < jmax; j++){
2284: col = riip[aj[j]];
2285: if (col >= k){ /* only take upper triangular entry */
2286: rtmp[col] = aa[j];
2287: *bval++ = 0.0; /* for in-place factorization */
2288: }
2289: }
2290: /* shift the diagonal of the matrix */
2291: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
2293: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
2294: dk = rtmp[k];
2295: i = jl[k]; /* first row to be added to k_th row */
2297: while (i < k){
2298: nexti = jl[i]; /* next row to be added to k_th row */
2299:
2300: /* compute multiplier, update diag(k) and U(i,k) */
2301: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
2302: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
2303: dk += uikdi*ba[ili];
2304: ba[ili] = uikdi; /* -U(i,k) */
2306: /* add multiple of row i to k-th row */
2307: jmin = ili + 1; jmax = bi[i+1];
2308: if (jmin < jmax){
2309: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
2310: /* update il and jl for row i */
2311: il[i] = jmin;
2312: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
2313: }
2314: i = nexti;
2315: }
2317: /* shift the diagonals when zero pivot is detected */
2318: /* compute rs=sum of abs(off-diagonal) */
2319: rs = 0.0;
2320: jmin = bi[k]+1;
2321: nz = bi[k+1] - jmin;
2322: bcol = bj + jmin;
2323: for (j=0; j<nz; j++) {
2324: rs += PetscAbsScalar(rtmp[bcol[j]]);
2325: }
2327: sctx.rs = rs;
2328: sctx.pv = dk;
2329: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
2331: if (newshift == 1) {
2332: if (!sctx.shift_amount) {
2333: sctx.shift_amount = 1e-5;
2334: }
2335: break;
2336: }
2337:
2338: /* copy data into U(k,:) */
2339: ba[bi[k]] = 1.0/dk; /* U(k,k) */
2340: jmin = bi[k]+1; jmax = bi[k+1];
2341: if (jmin < jmax) {
2342: for (j=jmin; j<jmax; j++){
2343: col = bj[j]; ba[j] = rtmp[col];
2344: }
2345: /* add the k-th row into il and jl */
2346: il[k] = jmin;
2347: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
2348: }
2349: }
2350: } while (sctx.chshift);
2351: PetscFree3(rtmp,il,jl);
2352: ISRestoreIndices(ip,&rip);
2353: ISRestoreIndices(iip,&riip);
2355: ISIdentity(ip,&perm_identity);
2356: if (perm_identity){
2357: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2358: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2359: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2360: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
2361: } else {
2362: B->ops->solve = MatSolve_SeqSBAIJ_1_inplace;
2363: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
2364: B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace;
2365: B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace;
2366: }
2368: C->assembled = PETSC_TRUE;
2369: C->preallocated = PETSC_TRUE;
2370: PetscLogFlops(C->rmap->n);
2371: if (sctx.nshift){
2372: if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
2373: PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2374: } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
2375: PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
2376: }
2377: }
2378: return(0);
2379: }
2381: /*
2382: icc() under revised new data structure.
2383: Factored arrays bj and ba are stored as
2384: U(0,:),...,U(i,:),U(n-1,:)
2386: ui=fact->i is an array of size n+1, in which
2387: ui+
2388: ui[i]: points to 1st entry of U(i,:),i=0,...,n-1
2389: ui[n]: points to U(n-1,n-1)+1
2390:
2391: udiag=fact->diag is an array of size n,in which
2392: udiag[i]: points to diagonal of U(i,:), i=0,...,n-1
2394: U(i,:) contains udiag[i] as its last entry, i.e.,
2395: U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i])
2396: */
2400: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2401: {
2402: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2403: Mat_SeqSBAIJ *b;
2404: PetscErrorCode ierr;
2405: PetscTruth perm_identity,missing;
2406: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2407: const PetscInt *rip,*riip;
2408: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2409: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2410: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2411: PetscReal fill=info->fill,levels=info->levels;
2412: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2413: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2414: PetscBT lnkbt;
2415: IS iperm;
2416:
2418: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2419: MatMissingDiagonal(A,&missing,&d);
2420: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2421: ISIdentity(perm,&perm_identity);
2422: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2424: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2425: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2426: ui[0] = 0;
2428: /* ICC(0) without matrix ordering: simply rearrange column indices */
2429: if (!levels && perm_identity) {
2430: for (i=0; i<am; i++) {
2431: ncols = ai[i+1] - a->diag[i];
2432: ui[i+1] = ui[i] + ncols;
2433: udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */
2434: }
2435: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2436: cols = uj;
2437: for (i=0; i<am; i++) {
2438: aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */
2439: ncols = ai[i+1] - a->diag[i] -1;
2440: for (j=0; j<ncols; j++) *cols++ = aj[j];
2441: *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */
2442: }
2443: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2444: ISGetIndices(iperm,&riip);
2445: ISGetIndices(perm,&rip);
2447: /* initialization */
2448: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2450: /* jl: linked list for storing indices of the pivot rows
2451: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2452: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2453: for (i=0; i<am; i++){
2454: jl[i] = am; il[i] = 0;
2455: }
2457: /* create and initialize a linked list for storing column indices of the active row k */
2458: nlnk = am + 1;
2459: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2461: /* initial FreeSpace size is fill*(ai[am]+1) */
2462: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2463: current_space = free_space;
2464: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2465: current_space_lvl = free_space_lvl;
2467: for (k=0; k<am; k++){ /* for each active row k */
2468: /* initialize lnk by the column indices of row rip[k] of A */
2469: nzk = 0;
2470: ncols = ai[rip[k]+1] - ai[rip[k]];
2471: if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2472: ncols_upper = 0;
2473: for (j=0; j<ncols; j++){
2474: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2475: if (riip[i] >= k){ /* only take upper triangular entry */
2476: ajtmp[ncols_upper] = i;
2477: ncols_upper++;
2478: }
2479: }
2480: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2481: nzk += nlnk;
2483: /* update lnk by computing fill-in for each pivot row to be merged in */
2484: prow = jl[k]; /* 1st pivot row */
2485:
2486: while (prow < k){
2487: nextprow = jl[prow];
2488:
2489: /* merge prow into k-th row */
2490: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2491: jmax = ui[prow+1];
2492: ncols = jmax-jmin;
2493: i = jmin - ui[prow];
2494: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2495: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2496: j = *(uj - 1);
2497: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2498: nzk += nlnk;
2500: /* update il and jl for prow */
2501: if (jmin < jmax){
2502: il[prow] = jmin;
2503: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2504: }
2505: prow = nextprow;
2506: }
2508: /* if free space is not available, make more free space */
2509: if (current_space->local_remaining<nzk) {
2510: i = am - k + 1; /* num of unfactored rows */
2511: i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2512: PetscFreeSpaceGet(i,¤t_space);
2513: PetscFreeSpaceGet(i,¤t_space_lvl);
2514: reallocs++;
2515: }
2517: /* copy data into free_space and free_space_lvl, then initialize lnk */
2518: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2519: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2521: /* add the k-th row into il and jl */
2522: if (nzk > 1){
2523: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2524: jl[k] = jl[i]; jl[i] = k;
2525: il[k] = ui[k] + 1;
2526: }
2527: uj_ptr[k] = current_space->array;
2528: uj_lvl_ptr[k] = current_space_lvl->array;
2530: current_space->array += nzk;
2531: current_space->local_used += nzk;
2532: current_space->local_remaining -= nzk;
2534: current_space_lvl->array += nzk;
2535: current_space_lvl->local_used += nzk;
2536: current_space_lvl->local_remaining -= nzk;
2538: ui[k+1] = ui[k] + nzk;
2539: }
2541: #if defined(PETSC_USE_INFO)
2542: if (ai[am] != 0) {
2543: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2544: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2545: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2546: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2547: } else {
2548: PetscInfo(A,"Empty matrix.\n");
2549: }
2550: #endif
2552: ISRestoreIndices(perm,&rip);
2553: ISRestoreIndices(iperm,&riip);
2554: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2555: PetscFree(ajtmp);
2557: /* destroy list of free space and other temporary array(s) */
2558: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2559: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2560: PetscIncompleteLLDestroy(lnk,lnkbt);
2561: PetscFreeSpaceDestroy(free_space_lvl);
2563: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2565: /* put together the new matrix in MATSEQSBAIJ format */
2566: b = (Mat_SeqSBAIJ*)(fact)->data;
2567: b->singlemalloc = PETSC_FALSE;
2568: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2569: b->j = uj;
2570: b->i = ui;
2571: b->diag = udiag;
2572: b->free_diag = PETSC_TRUE;
2573: b->ilen = 0;
2574: b->imax = 0;
2575: b->row = perm;
2576: b->col = perm;
2577: PetscObjectReference((PetscObject)perm);
2578: PetscObjectReference((PetscObject)perm);
2579: b->icol = iperm;
2580: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2581: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2582: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2583: b->maxnz = b->nz = ui[am];
2584: b->free_a = PETSC_TRUE;
2585: b->free_ij = PETSC_TRUE;
2586:
2587: fact->info.factor_mallocs = reallocs;
2588: fact->info.fill_ratio_given = fill;
2589: if (ai[am] != 0) {
2590: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2591: } else {
2592: fact->info.fill_ratio_needed = 0.0;
2593: }
2594: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2595: return(0);
2596: }
2600: PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2601: {
2602: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2603: Mat_SeqSBAIJ *b;
2604: PetscErrorCode ierr;
2605: PetscTruth perm_identity,missing;
2606: PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag;
2607: const PetscInt *rip,*riip;
2608: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2609: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d;
2610: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
2611: PetscReal fill=info->fill,levels=info->levels;
2612: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2613: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2614: PetscBT lnkbt;
2615: IS iperm;
2616:
2618: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2619: MatMissingDiagonal(A,&missing,&d);
2620: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
2621: ISIdentity(perm,&perm_identity);
2622: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2624: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2625: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2626: ui[0] = 0;
2628: /* ICC(0) without matrix ordering: simply copies fill pattern */
2629: if (!levels && perm_identity) {
2631: for (i=0; i<am; i++) {
2632: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
2633: udiag[i] = ui[i];
2634: }
2635: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2636: cols = uj;
2637: for (i=0; i<am; i++) {
2638: aj = a->j + a->diag[i];
2639: ncols = ui[i+1] - ui[i];
2640: for (j=0; j<ncols; j++) *cols++ = *aj++;
2641: }
2642: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2643: ISGetIndices(iperm,&riip);
2644: ISGetIndices(perm,&rip);
2646: /* initialization */
2647: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
2649: /* jl: linked list for storing indices of the pivot rows
2650: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2651: PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);
2652: for (i=0; i<am; i++){
2653: jl[i] = am; il[i] = 0;
2654: }
2656: /* create and initialize a linked list for storing column indices of the active row k */
2657: nlnk = am + 1;
2658: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2660: /* initial FreeSpace size is fill*(ai[am]+1) */
2661: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2662: current_space = free_space;
2663: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2664: current_space_lvl = free_space_lvl;
2666: for (k=0; k<am; k++){ /* for each active row k */
2667: /* initialize lnk by the column indices of row rip[k] of A */
2668: nzk = 0;
2669: ncols = ai[rip[k]+1] - ai[rip[k]];
2670: if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2671: ncols_upper = 0;
2672: for (j=0; j<ncols; j++){
2673: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
2674: if (riip[i] >= k){ /* only take upper triangular entry */
2675: ajtmp[ncols_upper] = i;
2676: ncols_upper++;
2677: }
2678: }
2679: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
2680: nzk += nlnk;
2682: /* update lnk by computing fill-in for each pivot row to be merged in */
2683: prow = jl[k]; /* 1st pivot row */
2684:
2685: while (prow < k){
2686: nextprow = jl[prow];
2687:
2688: /* merge prow into k-th row */
2689: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2690: jmax = ui[prow+1];
2691: ncols = jmax-jmin;
2692: i = jmin - ui[prow];
2693: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2694: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
2695: j = *(uj - 1);
2696: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2697: nzk += nlnk;
2699: /* update il and jl for prow */
2700: if (jmin < jmax){
2701: il[prow] = jmin;
2702: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2703: }
2704: prow = nextprow;
2705: }
2707: /* if free space is not available, make more free space */
2708: if (current_space->local_remaining<nzk) {
2709: i = am - k + 1; /* num of unfactored rows */
2710: i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2711: PetscFreeSpaceGet(i,¤t_space);
2712: PetscFreeSpaceGet(i,¤t_space_lvl);
2713: reallocs++;
2714: }
2716: /* copy data into free_space and free_space_lvl, then initialize lnk */
2717: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
2718: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2720: /* add the k-th row into il and jl */
2721: if (nzk > 1){
2722: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2723: jl[k] = jl[i]; jl[i] = k;
2724: il[k] = ui[k] + 1;
2725: }
2726: uj_ptr[k] = current_space->array;
2727: uj_lvl_ptr[k] = current_space_lvl->array;
2729: current_space->array += nzk;
2730: current_space->local_used += nzk;
2731: current_space->local_remaining -= nzk;
2733: current_space_lvl->array += nzk;
2734: current_space_lvl->local_used += nzk;
2735: current_space_lvl->local_remaining -= nzk;
2737: ui[k+1] = ui[k] + nzk;
2738: }
2740: #if defined(PETSC_USE_INFO)
2741: if (ai[am] != 0) {
2742: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
2743: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2744: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2745: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2746: } else {
2747: PetscInfo(A,"Empty matrix.\n");
2748: }
2749: #endif
2751: ISRestoreIndices(perm,&rip);
2752: ISRestoreIndices(iperm,&riip);
2753: PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);
2754: PetscFree(ajtmp);
2756: /* destroy list of free space and other temporary array(s) */
2757: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2758: PetscFreeSpaceContiguous(&free_space,uj);
2759: PetscIncompleteLLDestroy(lnk,lnkbt);
2760: PetscFreeSpaceDestroy(free_space_lvl);
2762: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2764: /* put together the new matrix in MATSEQSBAIJ format */
2766: b = (Mat_SeqSBAIJ*)fact->data;
2767: b->singlemalloc = PETSC_FALSE;
2768: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2769: b->j = uj;
2770: b->i = ui;
2771: b->diag = udiag;
2772: b->free_diag = PETSC_TRUE;
2773: b->ilen = 0;
2774: b->imax = 0;
2775: b->row = perm;
2776: b->col = perm;
2777: PetscObjectReference((PetscObject)perm);
2778: PetscObjectReference((PetscObject)perm);
2779: b->icol = iperm;
2780: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2781: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2782: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
2783: b->maxnz = b->nz = ui[am];
2784: b->free_a = PETSC_TRUE;
2785: b->free_ij = PETSC_TRUE;
2786:
2787: fact->info.factor_mallocs = reallocs;
2788: fact->info.fill_ratio_given = fill;
2789: if (ai[am] != 0) {
2790: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2791: } else {
2792: fact->info.fill_ratio_needed = 0.0;
2793: }
2794: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
2795: return(0);
2796: }
2798: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2799: {
2800: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2801: Mat_SeqSBAIJ *b;
2802: PetscErrorCode ierr;
2803: PetscTruth perm_identity;
2804: PetscReal fill = info->fill;
2805: const PetscInt *rip,*riip;
2806: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2807: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2808: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
2809: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2810: PetscBT lnkbt;
2811: IS iperm;
2814: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2815: /* check whether perm is the identity mapping */
2816: ISIdentity(perm,&perm_identity);
2817: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2818: ISGetIndices(iperm,&riip);
2819: ISGetIndices(perm,&rip);
2821: /* initialization */
2822: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2823: PetscMalloc((am+1)*sizeof(PetscInt),&udiag);
2824: ui[0] = 0;
2826: /* jl: linked list for storing indices of the pivot rows
2827: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2828: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2829: for (i=0; i<am; i++){
2830: jl[i] = am; il[i] = 0;
2831: }
2833: /* create and initialize a linked list for storing column indices of the active row k */
2834: nlnk = am + 1;
2835: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2837: /* initial FreeSpace size is fill*(ai[am]+1) */
2838: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2839: current_space = free_space;
2841: for (k=0; k<am; k++){ /* for each active row k */
2842: /* initialize lnk by the column indices of row rip[k] of A */
2843: nzk = 0;
2844: ncols = ai[rip[k]+1] - ai[rip[k]];
2845: if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
2846: ncols_upper = 0;
2847: for (j=0; j<ncols; j++){
2848: i = riip[*(aj + ai[rip[k]] + j)];
2849: if (i >= k){ /* only take upper triangular entry */
2850: cols[ncols_upper] = i;
2851: ncols_upper++;
2852: }
2853: }
2854: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
2855: nzk += nlnk;
2857: /* update lnk by computing fill-in for each pivot row to be merged in */
2858: prow = jl[k]; /* 1st pivot row */
2859:
2860: while (prow < k){
2861: nextprow = jl[prow];
2862: /* merge prow into k-th row */
2863: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2864: jmax = ui[prow+1];
2865: ncols = jmax-jmin;
2866: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2867: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
2868: nzk += nlnk;
2870: /* update il and jl for prow */
2871: if (jmin < jmax){
2872: il[prow] = jmin;
2873: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
2874: }
2875: prow = nextprow;
2876: }
2878: /* if free space is not available, make more free space */
2879: if (current_space->local_remaining<nzk) {
2880: i = am - k + 1; /* num of unfactored rows */
2881: i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */
2882: PetscFreeSpaceGet(i,¤t_space);
2883: reallocs++;
2884: }
2886: /* copy data into free space, then initialize lnk */
2887: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
2889: /* add the k-th row into il and jl */
2890: if (nzk > 1){
2891: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2892: jl[k] = jl[i]; jl[i] = k;
2893: il[k] = ui[k] + 1;
2894: }
2895: ui_ptr[k] = current_space->array;
2896: current_space->array += nzk;
2897: current_space->local_used += nzk;
2898: current_space->local_remaining -= nzk;
2900: ui[k+1] = ui[k] + nzk;
2901: }
2903: #if defined(PETSC_USE_INFO)
2904: if (ai[am] != 0) {
2905: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
2906: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2907: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2908: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2909: } else {
2910: PetscInfo(A,"Empty matrix.\n");
2911: }
2912: #endif
2914: ISRestoreIndices(perm,&rip);
2915: ISRestoreIndices(iperm,&riip);
2916: PetscFree4(ui_ptr,jl,il,cols);
2918: /* destroy list of free space and other temporary array(s) */
2919: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2920: PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag); /* store matrix factor */
2921: PetscLLDestroy(lnk,lnkbt);
2923: /* put together the new matrix in MATSEQSBAIJ format */
2925: b = (Mat_SeqSBAIJ*)fact->data;
2926: b->singlemalloc = PETSC_FALSE;
2927: b->free_a = PETSC_TRUE;
2928: b->free_ij = PETSC_TRUE;
2929: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
2930: b->j = uj;
2931: b->i = ui;
2932: b->diag = udiag;
2933: b->free_diag = PETSC_TRUE;
2934: b->ilen = 0;
2935: b->imax = 0;
2936: b->row = perm;
2937: b->col = perm;
2938: PetscObjectReference((PetscObject)perm);
2939: PetscObjectReference((PetscObject)perm);
2940: b->icol = iperm;
2941: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2942: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2943: PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));
2944: b->maxnz = b->nz = ui[am];
2945:
2946: fact->info.factor_mallocs = reallocs;
2947: fact->info.fill_ratio_given = fill;
2948: if (ai[am] != 0) {
2949: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2950: } else {
2951: fact->info.fill_ratio_needed = 0.0;
2952: }
2953: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
2954: return(0);
2955: }
2959: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
2960: {
2961: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2962: Mat_SeqSBAIJ *b;
2963: PetscErrorCode ierr;
2964: PetscTruth perm_identity;
2965: PetscReal fill = info->fill;
2966: const PetscInt *rip,*riip;
2967: PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow;
2968: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
2969: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
2970: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2971: PetscBT lnkbt;
2972: IS iperm;
2975: if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
2976: /* check whether perm is the identity mapping */
2977: ISIdentity(perm,&perm_identity);
2978: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
2979: ISGetIndices(iperm,&riip);
2980: ISGetIndices(perm,&rip);
2982: /* initialization */
2983: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2984: ui[0] = 0;
2986: /* jl: linked list for storing indices of the pivot rows
2987: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2988: PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);
2989: for (i=0; i<am; i++){
2990: jl[i] = am; il[i] = 0;
2991: }
2993: /* create and initialize a linked list for storing column indices of the active row k */
2994: nlnk = am + 1;
2995: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
2997: /* initial FreeSpace size is fill*(ai[am]+1) */
2998: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2999: current_space = free_space;
3001: for (k=0; k<am; k++){ /* for each active row k */
3002: /* initialize lnk by the column indices of row rip[k] of A */
3003: nzk = 0;
3004: ncols = ai[rip[k]+1] - ai[rip[k]];
3005: if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k);
3006: ncols_upper = 0;
3007: for (j=0; j<ncols; j++){
3008: i = riip[*(aj + ai[rip[k]] + j)];
3009: if (i >= k){ /* only take upper triangular entry */
3010: cols[ncols_upper] = i;
3011: ncols_upper++;
3012: }
3013: }
3014: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
3015: nzk += nlnk;
3017: /* update lnk by computing fill-in for each pivot row to be merged in */
3018: prow = jl[k]; /* 1st pivot row */
3019:
3020: while (prow < k){
3021: nextprow = jl[prow];
3022: /* merge prow into k-th row */
3023: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
3024: jmax = ui[prow+1];
3025: ncols = jmax-jmin;
3026: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
3027: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
3028: nzk += nlnk;
3030: /* update il and jl for prow */
3031: if (jmin < jmax){
3032: il[prow] = jmin;
3033: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
3034: }
3035: prow = nextprow;
3036: }
3038: /* if free space is not available, make more free space */
3039: if (current_space->local_remaining<nzk) {
3040: i = am - k + 1; /* num of unfactored rows */
3041: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
3042: PetscFreeSpaceGet(i,¤t_space);
3043: reallocs++;
3044: }
3046: /* copy data into free space, then initialize lnk */
3047: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
3049: /* add the k-th row into il and jl */
3050: if (nzk-1 > 0){
3051: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
3052: jl[k] = jl[i]; jl[i] = k;
3053: il[k] = ui[k] + 1;
3054: }
3055: ui_ptr[k] = current_space->array;
3056: current_space->array += nzk;
3057: current_space->local_used += nzk;
3058: current_space->local_remaining -= nzk;
3060: ui[k+1] = ui[k] + nzk;
3061: }
3063: #if defined(PETSC_USE_INFO)
3064: if (ai[am] != 0) {
3065: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
3066: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
3067: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
3068: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
3069: } else {
3070: PetscInfo(A,"Empty matrix.\n");
3071: }
3072: #endif
3074: ISRestoreIndices(perm,&rip);
3075: ISRestoreIndices(iperm,&riip);
3076: PetscFree4(ui_ptr,jl,il,cols);
3078: /* destroy list of free space and other temporary array(s) */
3079: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
3080: PetscFreeSpaceContiguous(&free_space,uj);
3081: PetscLLDestroy(lnk,lnkbt);
3083: /* put together the new matrix in MATSEQSBAIJ format */
3085: b = (Mat_SeqSBAIJ*)fact->data;
3086: b->singlemalloc = PETSC_FALSE;
3087: b->free_a = PETSC_TRUE;
3088: b->free_ij = PETSC_TRUE;
3089: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
3090: b->j = uj;
3091: b->i = ui;
3092: b->diag = 0;
3093: b->ilen = 0;
3094: b->imax = 0;
3095: b->row = perm;
3096: b->col = perm;
3097: PetscObjectReference((PetscObject)perm);
3098: PetscObjectReference((PetscObject)perm);
3099: b->icol = iperm;
3100: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
3101: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
3102: PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
3103: b->maxnz = b->nz = ui[am];
3104:
3105: fact->info.factor_mallocs = reallocs;
3106: fact->info.fill_ratio_given = fill;
3107: if (ai[am] != 0) {
3108: fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
3109: } else {
3110: fact->info.fill_ratio_needed = 0.0;
3111: }
3112: fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace;
3113: return(0);
3114: }
3118: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
3119: {
3120: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3121: PetscErrorCode ierr;
3122: PetscInt n = A->rmap->n;
3123: const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi;
3124: PetscScalar *x,sum;
3125: const PetscScalar *b;
3126: const MatScalar *aa = a->a,*v;
3127: PetscInt i,nz;
3130: if (!n) return(0);
3132: VecGetArray(bb,(PetscScalar**)&b);
3133: VecGetArray(xx,&x);
3135: /* forward solve the lower triangular */
3136: x[0] = b[0];
3137: v = aa;
3138: vi = aj;
3139: for (i=1; i<n; i++) {
3140: nz = ai[i+1] - ai[i];
3141: sum = b[i];
3142: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3143: v += nz;
3144: vi += nz;
3145: x[i] = sum;
3146: }
3147:
3148: /* backward solve the upper triangular */
3149: for (i=n-1; i>=0; i--){
3150: v = aa + adiag[i+1] + 1;
3151: vi = aj + adiag[i+1] + 1;
3152: nz = adiag[i] - adiag[i+1]-1;
3153: sum = x[i];
3154: PetscSparseDenseMinusDot(sum,x,v,vi,nz);
3155: x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */
3156: }
3157:
3158: PetscLogFlops(2.0*a->nz - A->cmap->n);
3159: VecRestoreArray(bb,(PetscScalar**)&b);
3160: VecRestoreArray(xx,&x);
3161: return(0);
3162: }
3166: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
3167: {
3168: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
3169: IS iscol = a->col,isrow = a->row;
3170: PetscErrorCode ierr;
3171: PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz;
3172: const PetscInt *rout,*cout,*r,*c;
3173: PetscScalar *x,*tmp,sum;
3174: const PetscScalar *b;
3175: const MatScalar *aa = a->a,*v;
3178: if (!n) return(0);
3180: VecGetArray(bb,(PetscScalar**)&b);
3181: VecGetArray(xx,&x);
3182: tmp = a->solve_work;
3184: ISGetIndices(isrow,&rout); r = rout;
3185: ISGetIndices(iscol,&cout); c = cout;
3187: /* forward solve the lower triangular */
3188: tmp[0] = b[r[0]];
3189: v = aa;
3190: vi = aj;
3191: for (i=1; i<n; i++) {
3192: nz = ai[i+1] - ai[i];
3193: sum = b[r[i]];
3194: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3195: tmp[i] = sum;
3196: v += nz; vi += nz;
3197: }
3199: /* backward solve the upper triangular */
3200: for (i=n-1; i>=0; i--){
3201: v = aa + adiag[i+1]+1;
3202: vi = aj + adiag[i+1]+1;
3203: nz = adiag[i]-adiag[i+1]-1;
3204: sum = tmp[i];
3205: PetscSparseDenseMinusDot(sum,tmp,v,vi,nz);
3206: x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */
3207: }
3209: ISRestoreIndices(isrow,&rout);
3210: ISRestoreIndices(iscol,&cout);
3211: VecRestoreArray(bb,(PetscScalar**)&b);
3212: VecRestoreArray(xx,&x);
3213: PetscLogFlops(2*a->nz - A->cmap->n);
3214: return(0);
3215: }
3219: /*
3220: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3221: */
3222: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact)
3223: {
3224: Mat B = *fact;
3225: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b;
3226: IS isicol;
3227: PetscErrorCode ierr;
3228: const PetscInt *r,*ic;
3229: PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag;
3230: PetscInt *bi,*bj,*bdiag,*bdiag_rev;
3231: PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au;
3232: PetscInt nlnk,*lnk;
3233: PetscBT lnkbt;
3234: PetscTruth row_identity,icol_identity,both_identity;
3235: MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp;
3236: const PetscInt *ics;
3237: PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp;
3238: PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount;
3239: PetscInt dtcount=(PetscInt)info->dtcount,nnz_max;
3240: PetscTruth missing;
3244: if (dt == PETSC_DEFAULT) dt = 0.005;
3245: if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */
3246: if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax);
3248: /* ------- symbolic factorization, can be reused ---------*/
3249: MatMissingDiagonal(A,&missing,&i);
3250: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
3251: adiag=a->diag;
3253: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
3255: /* bdiag is location of diagonal in factor */
3256: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag); /* becomes b->diag */
3257: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev); /* temporary */
3259: /* allocate row pointers bi */
3260: PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);
3262: /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */
3263: if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */
3264: nnz_max = ai[n]+2*n*dtcount+2;
3266: PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);
3267: PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);
3269: /* put together the new matrix */
3270: MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);
3271: PetscLogObjectParent(B,isicol);
3272: b = (Mat_SeqAIJ*)B->data;
3273: b->free_a = PETSC_TRUE;
3274: b->free_ij = PETSC_TRUE;
3275: b->singlemalloc = PETSC_FALSE;
3276: b->a = ba;
3277: b->j = bj;
3278: b->i = bi;
3279: b->diag = bdiag;
3280: b->ilen = 0;
3281: b->imax = 0;
3282: b->row = isrow;
3283: b->col = iscol;
3284: PetscObjectReference((PetscObject)isrow);
3285: PetscObjectReference((PetscObject)iscol);
3286: b->icol = isicol;
3287: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
3289: PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));
3290: b->maxnz = nnz_max;
3292: B->factor = MAT_FACTOR_ILUDT;
3293: B->info.factor_mallocs = 0;
3294: B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]);
3295: CHKMEMQ;
3296: /* ------- end of symbolic factorization ---------*/
3298: ISGetIndices(isrow,&r);
3299: ISGetIndices(isicol,&ic);
3300: ics = ic;
3302: /* linked list for storing column indices of the active row */
3303: nlnk = n + 1;
3304: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
3306: /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */
3307: PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);
3308: /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */
3309: PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);
3310: PetscMemzero(rtmp,n*sizeof(MatScalar));
3312: bi[0] = 0;
3313: bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */
3314: bdiag_rev[n] = bdiag[0];
3315: bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */
3316: for (i=0; i<n; i++) {
3317: /* copy initial fill into linked list */
3318: nzi = 0; /* nonzeros for active row i */
3319: nzi = ai[r[i]+1] - ai[r[i]];
3320: if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
3321: nzi_al = adiag[r[i]] - ai[r[i]];
3322: nzi_au = ai[r[i]+1] - adiag[r[i]] -1;
3323: ajtmp = aj + ai[r[i]];
3324: PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);
3325:
3326: /* load in initial (unfactored row) */
3327: aatmp = a->a + ai[r[i]];
3328: for (j=0; j<nzi; j++) {
3329: rtmp[ics[*ajtmp++]] = *aatmp++;
3330: }
3331:
3332: /* add pivot rows into linked list */
3333: row = lnk[n];
3334: while (row < i ) {
3335: nzi_bl = bi[row+1] - bi[row] + 1;
3336: bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */
3337: PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);
3338: nzi += nlnk;
3339: row = lnk[row];
3340: }
3341:
3342: /* copy data from lnk into jtmp, then initialize lnk */
3343: PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);
3345: /* numerical factorization */
3346: bjtmp = jtmp;
3347: row = *bjtmp++; /* 1st pivot row */
3348: while ( row < i ) {
3349: pc = rtmp + row;
3350: pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */
3351: multiplier = (*pc) * (*pv);
3352: *pc = multiplier;
3353: if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */
3354: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3355: pv = ba + bdiag[row+1] + 1;
3356: /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */
3357: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3358: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3359: PetscLogFlops(2.0*nz);
3360: }
3361: row = *bjtmp++;
3362: }
3364: /* copy sparse rtmp into contiguous vtmp; separate L and U part */
3365: diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */
3366: nzi_bl = 0; j = 0;
3367: while (jtmp[j] < i){ /* Note: jtmp is sorted */
3368: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3369: nzi_bl++; j++;
3370: }
3371: nzi_bu = nzi - nzi_bl -1;
3372: while (j < nzi){
3373: vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0;
3374: j++;
3375: }
3376:
3377: bjtmp = bj + bi[i];
3378: batmp = ba + bi[i];
3379: /* apply level dropping rule to L part */
3380: ncut = nzi_al + dtcount;
3381: if (ncut < nzi_bl){
3382: PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);
3383: PetscSortIntWithScalarArray(ncut,jtmp,vtmp);
3384: } else {
3385: ncut = nzi_bl;
3386: }
3387: for (j=0; j<ncut; j++){
3388: bjtmp[j] = jtmp[j];
3389: batmp[j] = vtmp[j];
3390: /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */
3391: }
3392: bi[i+1] = bi[i] + ncut;
3393: nzi = ncut + 1;
3394:
3395: /* apply level dropping rule to U part */
3396: ncut = nzi_au + dtcount;
3397: if (ncut < nzi_bu){
3398: PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);
3399: PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);
3400: } else {
3401: ncut = nzi_bu;
3402: }
3403: nzi += ncut;
3405: /* mark bdiagonal */
3406: bdiag[i+1] = bdiag[i] - (ncut + 1);
3407: bdiag_rev[n-i-1] = bdiag[i+1];
3408: bi[2*n - i] = bi[2*n - i +1] - (ncut + 1);
3409: bjtmp = bj + bdiag[i];
3410: batmp = ba + bdiag[i];
3411: *bjtmp = i;
3412: *batmp = diag_tmp; /* rtmp[i]; */
3413: if (*batmp == 0.0) {
3414: *batmp = dt+shift;
3415: /* printf(" row %d add shift %g\n",i,shift); */
3416: }
3417: *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */
3418: /* printf(" (%d,%g),",*bjtmp,*batmp); */
3419:
3420: bjtmp = bj + bdiag[i+1]+1;
3421: batmp = ba + bdiag[i+1]+1;
3422: for (k=0; k<ncut; k++){
3423: bjtmp[k] = jtmp[nzi_bl+1+k];
3424: batmp[k] = vtmp[nzi_bl+1+k];
3425: /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */
3426: }
3427: /* printf("\n"); */
3428:
3429: im[i] = nzi; /* used by PetscLLAddSortedLU() */
3430: /*
3431: printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]);
3432: printf(" ----------------------------\n");
3433: */
3434: } /* for (i=0; i<n; i++) */
3435: /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */
3436: if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]);
3438: ISRestoreIndices(isrow,&r);
3439: ISRestoreIndices(isicol,&ic);
3441: PetscLLDestroy(lnk,lnkbt);
3442: PetscFree2(im,jtmp);
3443: PetscFree2(rtmp,vtmp);
3444: PetscFree(bdiag_rev);
3446: PetscLogFlops(B->cmap->n);
3447: b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n];
3449: ISIdentity(isrow,&row_identity);
3450: ISIdentity(isicol,&icol_identity);
3451: both_identity = (PetscTruth) (row_identity && icol_identity);
3452: if (row_identity && icol_identity) {
3453: B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3454: } else {
3455: B->ops->solve = MatSolve_SeqAIJ;
3456: }
3457:
3458: B->ops->solveadd = 0;
3459: B->ops->solvetranspose = 0;
3460: B->ops->solvetransposeadd = 0;
3461: B->ops->matsolve = 0;
3462: B->assembled = PETSC_TRUE;
3463: B->preallocated = PETSC_TRUE;
3464: return(0);
3465: }
3467: /* a wraper of MatILUDTFactor_SeqAIJ() */
3470: /*
3471: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3472: */
3474: PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info)
3475: {
3476: PetscErrorCode ierr;
3479: MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);
3480: return(0);
3481: }
3483: /*
3484: same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors
3485: - intend to replace existing MatLUFactorNumeric_SeqAIJ()
3486: */
3489: /*
3490: This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors
3491: */
3493: PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info)
3494: {
3495: Mat C=fact;
3496: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
3497: IS isrow = b->row,isicol = b->icol;
3499: const PetscInt *r,*ic,*ics;
3500: PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
3501: PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj;
3502: MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a;
3503: PetscReal dt=info->dt,shift=info->shiftamount;
3504: PetscTruth row_identity, col_identity;
3507: ISGetIndices(isrow,&r);
3508: ISGetIndices(isicol,&ic);
3509: PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);
3510: ics = ic;
3512: for (i=0; i<n; i++){
3513: /* initialize rtmp array */
3514: nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */
3515: bjtmp = bj + bi[i];
3516: for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0;
3517: rtmp[i] = 0.0;
3518: nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */
3519: bjtmp = bj + bdiag[i+1] + 1;
3520: for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0;
3522: /* load in initial unfactored row of A */
3523: /* printf("row %d\n",i); */
3524: nz = ai[r[i]+1] - ai[r[i]];
3525: ajtmp = aj + ai[r[i]];
3526: v = aa + ai[r[i]];
3527: for (j=0; j<nz; j++) {
3528: rtmp[ics[*ajtmp++]] = v[j];
3529: /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */
3530: }
3531: /* printf("\n"); */
3533: /* numerical factorization */
3534: bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */
3535: nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */
3536: k = 0;
3537: while (k < nzl){
3538: row = *bjtmp++;
3539: /* printf(" prow %d\n",row); */
3540: pc = rtmp + row;
3541: pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */
3542: multiplier = (*pc) * (*pv);
3543: *pc = multiplier;
3544: if (PetscAbsScalar(multiplier) > dt){
3545: pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */
3546: pv = b->a + bdiag[row+1] + 1;
3547: nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */
3548: for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++);
3549: /* PetscLogFlops(2.0*nz); */
3550: }
3551: k++;
3552: }
3553:
3554: /* finished row so stick it into b->a */
3555: /* L-part */
3556: pv = b->a + bi[i] ;
3557: pj = bj + bi[i] ;
3558: nzl = bi[i+1] - bi[i];
3559: for (j=0; j<nzl; j++) {
3560: pv[j] = rtmp[pj[j]];
3561: /* printf(" (%d,%g),",pj[j],pv[j]); */
3562: }
3564: /* diagonal: invert diagonal entries for simplier triangular solves */
3565: if (rtmp[i] == 0.0) rtmp[i] = dt+shift;
3566: b->a[bdiag[i]] = 1.0/rtmp[i];
3567: /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */
3569: /* U-part */
3570: pv = b->a + bdiag[i+1] + 1;
3571: pj = bj + bdiag[i+1] + 1;
3572: nzu = bdiag[i] - bdiag[i+1] - 1;
3573: for (j=0; j<nzu; j++) {
3574: pv[j] = rtmp[pj[j]];
3575: /* printf(" (%d,%g),",pj[j],pv[j]); */
3576: }
3577: /* printf("\n"); */
3578: }
3580: PetscFree(rtmp);
3581: ISRestoreIndices(isicol,&ic);
3582: ISRestoreIndices(isrow,&r);
3583:
3584: ISIdentity(isrow,&row_identity);
3585: ISIdentity(isicol,&col_identity);
3586: if (row_identity && col_identity) {
3587: C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
3588: } else {
3589: C->ops->solve = MatSolve_SeqAIJ;
3590: }
3591: C->ops->solveadd = 0;
3592: C->ops->solvetranspose = 0;
3593: C->ops->solvetransposeadd = 0;
3594: C->ops->matsolve = 0;
3595: C->assembled = PETSC_TRUE;
3596: C->preallocated = PETSC_TRUE;
3597: PetscLogFlops(C->cmap->n);
3598: return(0);
3599: }