Actual source code: baijfact3.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Factorization code for BAIJ format. 
  5: */
 6:  #include ../src/mat/impls/baij/seq/baij.h
 7:  #include ../src/mat/blockinvert.h

 11: /*
 12:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 13: */
 14: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscTruth natural)
 15: {
 17:   if(natural){
 18:     switch (fact->rmap->bs){
 19:     case 2:
 20:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 21:       break;
 22:     case 3:
 23:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 24:       break;
 25:     case 4:
 26:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 27:       break;
 28:     case 5:
 29:        fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 30:        break;
 31:     case 6:
 32:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 33:       break;
 34:     case 7:
 35:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 36:       break;
 37:     case 15:
 38:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 39:       break;
 40:     default:
 41:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 42:       break;
 43:     }
 44:   }
 45:   else{
 46:     switch (fact->rmap->bs){
 47:     case 2:
 48:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 49:       break;
 50:     case 3:
 51:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 52:       break;
 53:     case 4:
 54:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 55:       break;
 56:     case 5:
 57:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 58:       break;
 59:     case 6:
 60:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 61:       break;
 62:     case 7:
 63:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 64:       break;
 65:     default:
 66:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 67:       break;
 68:     }
 69:   }
 70:   return(0);
 71: }

 73: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscTruth natural)
 74: {
 76:   if (natural) {
 77:     switch (inA->rmap->bs) {
 78:     case 1:
 79:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 80:       break;
 81:     case 2:
 82:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 83:       break;
 84:     case 3:
 85:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 86:       break;
 87:     case 4:
 88: #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
 89:       {
 90:         PetscTruth  sse_enabled_local;
 92:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);
 93:         if (sse_enabled_local) {
 94: #  if defined(PETSC_HAVE_SSE)
 95:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
 96:           if (n==(unsigned short)n) {
 97:             unsigned short *aj=(unsigned short *)AJ;
 98:             for (i=0;i<nz;i++) {
 99:               aj[i] = (unsigned short)AJ[i];
100:             }
101:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
102:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
103:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
104:           } else {
105:         /* Scale the column indices for easier indexing in MatSolve. */
106: /*            for (i=0;i<nz;i++) { */
107: /*              AJ[i] = AJ[i]*4; */
108: /*            } */
109:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
110:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
111:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
112:           }
113: #  else
114:         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
115:           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
116: #  endif
117:         } else {
118:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
119:         }
120:       }
121: #else
122:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
123: #endif
124:       break;
125:     case 5:
126:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
127:       break;
128:     case 6:
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
130:       break;
131:     case 7:
132:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
133:       break;
134:     default:
135:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
136:       break;
137:     }
138:   } else {
139:     switch (inA->rmap->bs) {
140:     case 1:
141:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
142:       break;
143:     case 2:
144:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
145:       break;
146:     case 3:
147:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
148:       break;
149:     case 4:
150:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
151:       break;
152:     case 5:
153:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
154:       break;
155:     case 6:
156:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
157:       break;
158:     case 7:
159:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
160:       break;
161:     default:
162:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
163:       break;
164:     }
165:   }
166:   return(0);
167: }

169: /*
170:     The symbolic factorization code is identical to that for AIJ format,
171:   except for very small changes since this is now a SeqBAIJ datastructure.
172:   NOT good code reuse.
173: */
174:  #include petscbt.h
175:  #include ../src/mat/utils/freespace.h

179: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
180: {
181:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
182:   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
183:   PetscTruth         row_identity,col_identity,both_identity;
184:   IS                 isicol;
185:   PetscErrorCode     ierr;
186:   const PetscInt     *r,*ic;
187:   PetscInt           i,*ai=a->i,*aj=a->j;
188:   PetscInt           *bi,*bj,*ajtmp;
189:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
190:   PetscReal          f;
191:   PetscInt           nlnk,*lnk,k,**bi_ptr;
192:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
193:   PetscBT            lnkbt;

196:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
197:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
198:   ISGetIndices(isrow,&r);
199:   ISGetIndices(isicol,&ic);

201:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
202:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
203:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
204:   bi[0] = bdiag[0] = 0;

206:   /* linked list for storing column indices of the active row */
207:   nlnk = n + 1;
208:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

210:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

212:   /* initial FreeSpace size is f*(ai[n]+1) */
213:   f = info->fill;
214:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
215:   current_space = free_space;

217:   for (i=0; i<n; i++) {
218:     /* copy previous fill into linked list */
219:     nzi = 0;
220:     nnz = ai[r[i]+1] - ai[r[i]];
221:     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
222:     ajtmp = aj + ai[r[i]];
223:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
224:     nzi += nlnk;

226:     /* add pivot rows into linked list */
227:     row = lnk[n];
228:     while (row < i) {
229:       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
230:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
231:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
232:       nzi += nlnk;
233:       row  = lnk[row];
234:     }
235:     bi[i+1] = bi[i] + nzi;
236:     im[i]   = nzi;

238:     /* mark bdiag */
239:     nzbd = 0;
240:     nnz  = nzi;
241:     k    = lnk[n];
242:     while (nnz-- && k < i){
243:       nzbd++;
244:       k = lnk[k];
245:     }
246:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

248:     /* if free space is not available, make more free space */
249:     if (current_space->local_remaining<nzi) {
250:       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
251:       PetscFreeSpaceGet(nnz,&current_space);
252:       reallocs++;
253:     }

255:     /* copy data into free space, then initialize lnk */
256:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
257:     bi_ptr[i] = current_space->array;
258:     current_space->array           += nzi;
259:     current_space->local_used      += nzi;
260:     current_space->local_remaining -= nzi;
261:   }
262: #if defined(PETSC_USE_INFO)
263:   if (ai[n] != 0) {
264:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
265:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
266:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
267:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
268:     PetscInfo(A,"for best performance.\n");
269:   } else {
270:     PetscInfo(A,"Empty matrix\n");
271:   }
272: #endif

274:   ISRestoreIndices(isrow,&r);
275:   ISRestoreIndices(isicol,&ic);

277:   /* destroy list of free space and other temporary array(s) */
278:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
279:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
280:   PetscLLDestroy(lnk,lnkbt);
281:   PetscFree2(bi_ptr,im);

283:   /* put together the new matrix */
284:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
285:   PetscLogObjectParent(B,isicol);
286:   b    = (Mat_SeqBAIJ*)(B)->data;
287:   b->free_a       = PETSC_TRUE;
288:   b->free_ij      = PETSC_TRUE;
289:   b->singlemalloc = PETSC_FALSE;
290:   PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);
291:   b->j          = bj;
292:   b->i          = bi;
293:   b->diag       = bdiag;
294:   b->free_diag  = PETSC_TRUE;
295:   b->ilen       = 0;
296:   b->imax       = 0;
297:   b->row        = isrow;
298:   b->col        = iscol;
299:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
300:   PetscObjectReference((PetscObject)isrow);
301:   PetscObjectReference((PetscObject)iscol);
302:   b->icol       = isicol;
303:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
304:   PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
305: 
306:   b->maxnz = b->nz = bdiag[0]+1;
307:   B->factor                =  MAT_FACTOR_LU;
308:   B->info.factor_mallocs   = reallocs;
309:   B->info.fill_ratio_given = f;

311:   if (ai[n] != 0) {
312:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
313:   } else {
314:     B->info.fill_ratio_needed = 0.0;
315:   }
316: 
317:   ISIdentity(isrow,&row_identity);
318:   ISIdentity(iscol,&col_identity);
319:   both_identity = (PetscTruth) (row_identity && col_identity);
320:   MatSeqBAIJSetNumericFactorization(B,both_identity);
321:   return(0);
322:  }

326: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
327: {
328:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
329:   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
330:   PetscTruth         row_identity,col_identity,both_identity;
331:   IS                 isicol;
332:   PetscErrorCode     ierr;
333:   const PetscInt     *r,*ic;
334:   PetscInt           i,*ai=a->i,*aj=a->j;
335:   PetscInt           *bi,*bj,*ajtmp;
336:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
337:   PetscReal          f;
338:   PetscInt           nlnk,*lnk,k,**bi_ptr;
339:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
340:   PetscBT            lnkbt;

343:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
344:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
345:   ISGetIndices(isrow,&r);
346:   ISGetIndices(isicol,&ic);

348:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
349:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
350:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);

352:   bi[0] = bdiag[0] = 0;

354:   /* linked list for storing column indices of the active row */
355:   nlnk = n + 1;
356:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

358:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

360:   /* initial FreeSpace size is f*(ai[n]+1) */
361:   f = info->fill;
362:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
363:   current_space = free_space;

365:   for (i=0; i<n; i++) {
366:     /* copy previous fill into linked list */
367:     nzi = 0;
368:     nnz = ai[r[i]+1] - ai[r[i]];
369:     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
370:     ajtmp = aj + ai[r[i]];
371:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
372:     nzi += nlnk;

374:     /* add pivot rows into linked list */
375:     row = lnk[n];
376:     while (row < i) {
377:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
378:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
379:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
380:       nzi += nlnk;
381:       row  = lnk[row];
382:     }
383:     bi[i+1] = bi[i] + nzi;
384:     im[i]   = nzi;

386:     /* mark bdiag */
387:     nzbd = 0;
388:     nnz  = nzi;
389:     k    = lnk[n];
390:     while (nnz-- && k < i){
391:       nzbd++;
392:       k = lnk[k];
393:     }
394:     bdiag[i] = bi[i] + nzbd;

396:     /* if free space is not available, make more free space */
397:     if (current_space->local_remaining<nzi) {
398:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
399:       PetscFreeSpaceGet(nnz,&current_space);
400:       reallocs++;
401:     }

403:     /* copy data into free space, then initialize lnk */
404:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
405:     bi_ptr[i] = current_space->array;
406:     current_space->array           += nzi;
407:     current_space->local_used      += nzi;
408:     current_space->local_remaining -= nzi;
409:   }
410: #if defined(PETSC_USE_INFO)
411:   if (ai[n] != 0) {
412:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
413:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
414:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
415:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
416:     PetscInfo(A,"for best performance.\n");
417:   } else {
418:     PetscInfo(A,"Empty matrix\n");
419:   }
420: #endif

422:   ISRestoreIndices(isrow,&r);
423:   ISRestoreIndices(isicol,&ic);

425:   /* destroy list of free space and other temporary array(s) */
426:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
427:   PetscFreeSpaceContiguous(&free_space,bj);
428:   PetscLLDestroy(lnk,lnkbt);
429:   PetscFree2(bi_ptr,im);

431:   /* put together the new matrix */
432:   MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);
433:   PetscLogObjectParent(B,isicol);
434:   b    = (Mat_SeqBAIJ*)(B)->data;
435:   b->free_a       = PETSC_TRUE;
436:   b->free_ij      = PETSC_TRUE;
437:   b->singlemalloc = PETSC_FALSE;
438:   PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);
439:   b->j          = bj;
440:   b->i          = bi;
441:   b->diag       = bdiag;
442:   b->free_diag  = PETSC_TRUE;
443:   b->ilen       = 0;
444:   b->imax       = 0;
445:   b->row        = isrow;
446:   b->col        = iscol;
447:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
448:   PetscObjectReference((PetscObject)isrow);
449:   PetscObjectReference((PetscObject)iscol);
450:   b->icol       = isicol;
451:   PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);
452:   PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));
453: 
454:   b->maxnz = b->nz = bi[n] ;
455:   (B)->factor                =  MAT_FACTOR_LU;
456:   (B)->info.factor_mallocs   = reallocs;
457:   (B)->info.fill_ratio_given = f;

459:   if (ai[n] != 0) {
460:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
461:   } else {
462:     (B)->info.fill_ratio_needed = 0.0;
463:   }

465:   ISIdentity(isrow,&row_identity);
466:   ISIdentity(iscol,&col_identity);
467:   both_identity = (PetscTruth) (row_identity && col_identity);
468:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
469:   return(0);
470:  }