Actual source code: baijfact3.c
1: #define PETSCMAT_DLL
3: /*
4: Factorization code for BAIJ format.
5: */
6: #include ../src/mat/impls/baij/seq/baij.h
7: #include ../src/mat/blockinvert.h
11: /*
12: This is used to set the numeric factorization for both LU and ILU symbolic factorization
13: */
14: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscTruth natural)
15: {
17: if(natural){
18: switch (fact->rmap->bs){
19: case 2:
20: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
21: break;
22: case 3:
23: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
24: break;
25: case 4:
26: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
27: break;
28: case 5:
29: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
30: break;
31: case 6:
32: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
33: break;
34: case 7:
35: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
36: break;
37: case 15:
38: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
39: break;
40: default:
41: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42: break;
43: }
44: }
45: else{
46: switch (fact->rmap->bs){
47: case 2:
48: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
49: break;
50: case 3:
51: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
52: break;
53: case 4:
54: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
55: break;
56: case 5:
57: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
58: break;
59: case 6:
60: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
61: break;
62: case 7:
63: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
64: break;
65: default:
66: fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
67: break;
68: }
69: }
70: return(0);
71: }
73: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscTruth natural)
74: {
76: if (natural) {
77: switch (inA->rmap->bs) {
78: case 1:
79: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
80: break;
81: case 2:
82: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
83: break;
84: case 3:
85: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
86: break;
87: case 4:
88: #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
89: {
90: PetscTruth sse_enabled_local;
92: PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
93: if (sse_enabled_local) {
94: # if defined(PETSC_HAVE_SSE)
95: int i,*AJ=a->j,nz=a->nz,n=a->mbs;
96: if (n==(unsigned short)n) {
97: unsigned short *aj=(unsigned short *)AJ;
98: for (i=0;i<nz;i++) {
99: aj[i] = (unsigned short)AJ[i];
100: }
101: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
102: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
103: PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
104: } else {
105: /* Scale the column indices for easier indexing in MatSolve. */
106: /* for (i=0;i<nz;i++) { */
107: /* AJ[i] = AJ[i]*4; */
108: /* } */
109: inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
110: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
111: PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
112: }
113: # else
114: /* This should never be reached. If so, problem in PetscSSEIsEnabled. */
115: SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
116: # endif
117: } else {
118: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
119: }
120: }
121: #else
122: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
123: #endif
124: break;
125: case 5:
126: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
127: break;
128: case 6:
129: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
130: break;
131: case 7:
132: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
133: break;
134: default:
135: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
136: break;
137: }
138: } else {
139: switch (inA->rmap->bs) {
140: case 1:
141: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
142: break;
143: case 2:
144: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
145: break;
146: case 3:
147: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
148: break;
149: case 4:
150: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
151: break;
152: case 5:
153: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
154: break;
155: case 6:
156: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
157: break;
158: case 7:
159: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
160: break;
161: default:
162: inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
163: break;
164: }
165: }
166: return(0);
167: }
169: /*
170: The symbolic factorization code is identical to that for AIJ format,
171: except for very small changes since this is now a SeqBAIJ datastructure.
172: NOT good code reuse.
173: */
174: #include petscbt.h
175: #include ../src/mat/utils/freespace.h
179: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
180: {
181: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
182: PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
183: PetscTruth row_identity,col_identity,both_identity;
184: IS isicol;
185: PetscErrorCode ierr;
186: const PetscInt *r,*ic;
187: PetscInt i,*ai=a->i,*aj=a->j;
188: PetscInt *bi,*bj,*ajtmp;
189: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
190: PetscReal f;
191: PetscInt nlnk,*lnk,k,**bi_ptr;
192: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
193: PetscBT lnkbt;
196: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
197: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
198: ISGetIndices(isrow,&r);
199: ISGetIndices(isicol,&ic);
201: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
202: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
203: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
204: bi[0] = bdiag[0] = 0;
206: /* linked list for storing column indices of the active row */
207: nlnk = n + 1;
208: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
210: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
212: /* initial FreeSpace size is f*(ai[n]+1) */
213: f = info->fill;
214: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
215: current_space = free_space;
217: for (i=0; i<n; i++) {
218: /* copy previous fill into linked list */
219: nzi = 0;
220: nnz = ai[r[i]+1] - ai[r[i]];
221: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
222: ajtmp = aj + ai[r[i]];
223: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
224: nzi += nlnk;
226: /* add pivot rows into linked list */
227: row = lnk[n];
228: while (row < i) {
229: nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */
230: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
231: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
232: nzi += nlnk;
233: row = lnk[row];
234: }
235: bi[i+1] = bi[i] + nzi;
236: im[i] = nzi;
238: /* mark bdiag */
239: nzbd = 0;
240: nnz = nzi;
241: k = lnk[n];
242: while (nnz-- && k < i){
243: nzbd++;
244: k = lnk[k];
245: }
246: bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
248: /* if free space is not available, make more free space */
249: if (current_space->local_remaining<nzi) {
250: nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
251: PetscFreeSpaceGet(nnz,¤t_space);
252: reallocs++;
253: }
255: /* copy data into free space, then initialize lnk */
256: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
257: bi_ptr[i] = current_space->array;
258: current_space->array += nzi;
259: current_space->local_used += nzi;
260: current_space->local_remaining -= nzi;
261: }
262: #if defined(PETSC_USE_INFO)
263: if (ai[n] != 0) {
264: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
265: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
266: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
267: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
268: PetscInfo(A,"for best performance.\n");
269: } else {
270: PetscInfo(A,"Empty matrix\n");
271: }
272: #endif
274: ISRestoreIndices(isrow,&r);
275: ISRestoreIndices(isicol,&ic);
277: /* destroy list of free space and other temporary array(s) */
278: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
279: PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
280: PetscLLDestroy(lnk,lnkbt);
281: PetscFree2(bi_ptr,im);
283: /* put together the new matrix */
284: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
285: PetscLogObjectParent(B,isicol);
286: b = (Mat_SeqBAIJ*)(B)->data;
287: b->free_a = PETSC_TRUE;
288: b->free_ij = PETSC_TRUE;
289: b->singlemalloc = PETSC_FALSE;
290: PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
291: b->j = bj;
292: b->i = bi;
293: b->diag = bdiag;
294: b->free_diag = PETSC_TRUE;
295: b->ilen = 0;
296: b->imax = 0;
297: b->row = isrow;
298: b->col = iscol;
299: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
300: PetscObjectReference((PetscObject)isrow);
301: PetscObjectReference((PetscObject)iscol);
302: b->icol = isicol;
303: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
304: PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
305:
306: b->maxnz = b->nz = bdiag[0]+1;
307: B->factor = MAT_FACTOR_LU;
308: B->info.factor_mallocs = reallocs;
309: B->info.fill_ratio_given = f;
311: if (ai[n] != 0) {
312: B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
313: } else {
314: B->info.fill_ratio_needed = 0.0;
315: }
316:
317: ISIdentity(isrow,&row_identity);
318: ISIdentity(iscol,&col_identity);
319: both_identity = (PetscTruth) (row_identity && col_identity);
320: MatSeqBAIJSetNumericFactorization(B,both_identity);
321: return(0);
322: }
326: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
327: {
328: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b;
329: PetscInt n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
330: PetscTruth row_identity,col_identity,both_identity;
331: IS isicol;
332: PetscErrorCode ierr;
333: const PetscInt *r,*ic;
334: PetscInt i,*ai=a->i,*aj=a->j;
335: PetscInt *bi,*bj,*ajtmp;
336: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
337: PetscReal f;
338: PetscInt nlnk,*lnk,k,**bi_ptr;
339: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
340: PetscBT lnkbt;
343: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
344: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
345: ISGetIndices(isrow,&r);
346: ISGetIndices(isicol,&ic);
348: /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
349: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
350: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
352: bi[0] = bdiag[0] = 0;
354: /* linked list for storing column indices of the active row */
355: nlnk = n + 1;
356: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
358: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
360: /* initial FreeSpace size is f*(ai[n]+1) */
361: f = info->fill;
362: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
363: current_space = free_space;
365: for (i=0; i<n; i++) {
366: /* copy previous fill into linked list */
367: nzi = 0;
368: nnz = ai[r[i]+1] - ai[r[i]];
369: if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
370: ajtmp = aj + ai[r[i]];
371: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
372: nzi += nlnk;
374: /* add pivot rows into linked list */
375: row = lnk[n];
376: while (row < i) {
377: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
378: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
379: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
380: nzi += nlnk;
381: row = lnk[row];
382: }
383: bi[i+1] = bi[i] + nzi;
384: im[i] = nzi;
386: /* mark bdiag */
387: nzbd = 0;
388: nnz = nzi;
389: k = lnk[n];
390: while (nnz-- && k < i){
391: nzbd++;
392: k = lnk[k];
393: }
394: bdiag[i] = bi[i] + nzbd;
396: /* if free space is not available, make more free space */
397: if (current_space->local_remaining<nzi) {
398: nnz = (n - i)*nzi; /* estimated and max additional space needed */
399: PetscFreeSpaceGet(nnz,¤t_space);
400: reallocs++;
401: }
403: /* copy data into free space, then initialize lnk */
404: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
405: bi_ptr[i] = current_space->array;
406: current_space->array += nzi;
407: current_space->local_used += nzi;
408: current_space->local_remaining -= nzi;
409: }
410: #if defined(PETSC_USE_INFO)
411: if (ai[n] != 0) {
412: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
413: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
414: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
415: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
416: PetscInfo(A,"for best performance.\n");
417: } else {
418: PetscInfo(A,"Empty matrix\n");
419: }
420: #endif
422: ISRestoreIndices(isrow,&r);
423: ISRestoreIndices(isicol,&ic);
425: /* destroy list of free space and other temporary array(s) */
426: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
427: PetscFreeSpaceContiguous(&free_space,bj);
428: PetscLLDestroy(lnk,lnkbt);
429: PetscFree2(bi_ptr,im);
431: /* put together the new matrix */
432: MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
433: PetscLogObjectParent(B,isicol);
434: b = (Mat_SeqBAIJ*)(B)->data;
435: b->free_a = PETSC_TRUE;
436: b->free_ij = PETSC_TRUE;
437: b->singlemalloc = PETSC_FALSE;
438: PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
439: b->j = bj;
440: b->i = bi;
441: b->diag = bdiag;
442: b->free_diag = PETSC_TRUE;
443: b->ilen = 0;
444: b->imax = 0;
445: b->row = isrow;
446: b->col = iscol;
447: b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
448: PetscObjectReference((PetscObject)isrow);
449: PetscObjectReference((PetscObject)iscol);
450: b->icol = isicol;
451: PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
452: PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
453:
454: b->maxnz = b->nz = bi[n] ;
455: (B)->factor = MAT_FACTOR_LU;
456: (B)->info.factor_mallocs = reallocs;
457: (B)->info.fill_ratio_given = f;
459: if (ai[n] != 0) {
460: (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
461: } else {
462: (B)->info.fill_ratio_needed = 0.0;
463: }
465: ISIdentity(isrow,&row_identity);
466: ISIdentity(iscol,&col_identity);
467: both_identity = (PetscTruth) (row_identity && col_identity);
468: MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
469: return(0);
470: }