Actual source code: mpidense.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */

  7: 
 8:  #include ../src/mat/impls/dense/mpi/mpidense.h
  9: #if defined(PETSC_HAVE_PLAPACK)
 10: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
 11: static MPI_Comm Plapack_comm_2d;
 13: #include "PLA.h"

 16: typedef struct {
 17:   PLA_Obj        A,pivots;
 18:   PLA_Template   templ;
 19:   MPI_Datatype   datatype;
 20:   PetscInt       nb,rstart;
 21:   VecScatter     ctx;
 22:   IS             is_pla,is_petsc;
 23:   PetscTruth     pla_solved;
 24:   MatStructure   mstruct;
 25: } Mat_Plapack;
 26: #endif

 30: /*@

 32:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 33:               matrix that represents the operator. For sequential matrices it returns itself.

 35:     Input Parameter:
 36: .      A - the Seq or MPI dense matrix

 38:     Output Parameter:
 39: .      B - the inner matrix

 41:     Level: intermediate

 43: @*/
 44: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 45: {
 46:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 48:   PetscTruth     flg;

 51:   PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 52:   if (flg) {
 53:     *B = mat->A;
 54:   } else {
 55:     *B = A;
 56:   }
 57:   return(0);
 58: }

 62: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 63: {
 64:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 66:   PetscInt       lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;

 69:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
 70:   lrow = row - rstart;
 71:   MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
 72:   return(0);
 73: }

 77: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 78: {

 82:   if (idx) {PetscFree(*idx);}
 83:   if (v) {PetscFree(*v);}
 84:   return(0);
 85: }

 90: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 91: {
 92:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
 94:   PetscInt       m = A->rmap->n,rstart = A->rmap->rstart;
 95:   PetscScalar    *array;
 96:   MPI_Comm       comm;

 99:   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

101:   /* The reuse aspect is not implemented efficiently */
102:   if (reuse) { MatDestroy(*B);}

104:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
105:   MatGetArray(mdn->A,&array);
106:   MatCreate(comm,B);
107:   MatSetSizes(*B,m,m,m,m);
108:   MatSetType(*B,((PetscObject)mdn->A)->type_name);
109:   MatSeqDenseSetPreallocation(*B,array+m*rstart);
110:   MatRestoreArray(mdn->A,&array);
111:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
112:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
113: 
114:   *iscopy = PETSC_TRUE;
115:   return(0);
116: }

121: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122: {
123:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
125:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126:   PetscTruth     roworiented = A->roworiented;

130:   for (i=0; i<m; i++) {
131:     if (idxm[i] < 0) continue;
132:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133:     if (idxm[i] >= rstart && idxm[i] < rend) {
134:       row = idxm[i] - rstart;
135:       if (roworiented) {
136:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
137:       } else {
138:         for (j=0; j<n; j++) {
139:           if (idxn[j] < 0) continue;
140:           if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
142:         }
143:       }
144:     } else {
145:       if (!A->donotstash) {
146:         if (roworiented) {
147:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
148:         } else {
149:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
150:         }
151:       }
152:     }
153:   }
154:   return(0);
155: }

159: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160: {
161:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
163:   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;

166:   for (i=0; i<m; i++) {
167:     if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168:     if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169:     if (idxm[i] >= rstart && idxm[i] < rend) {
170:       row = idxm[i] - rstart;
171:       for (j=0; j<n; j++) {
172:         if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173:         if (idxn[j] >= mat->cmap->N) {
174:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
175:         }
176:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
177:       }
178:     } else {
179:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
180:     }
181:   }
182:   return(0);
183: }

187: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
188: {
189:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

193:   MatGetArray(a->A,array);
194:   return(0);
195: }

199: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
200: {
201:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
202:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
204:   PetscInt       i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
205:   const PetscInt *irow,*icol;
206:   PetscScalar    *av,*bv,*v = lmat->v;
207:   Mat            newmat;
208:   IS             iscol_local;

211:   ISAllGather(iscol,&iscol_local);
212:   ISGetIndices(isrow,&irow);
213:   ISGetIndices(iscol_local,&icol);
214:   ISGetLocalSize(isrow,&nrows);
215:   ISGetLocalSize(iscol,&ncols);
216:   ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */

218:   /* No parallel redistribution currently supported! Should really check each index set
219:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
220:      original matrix! */

222:   MatGetLocalSize(A,&nlrows,&nlcols);
223:   MatGetOwnershipRange(A,&rstart,&rend);
224: 
225:   /* Check submatrix call */
226:   if (scall == MAT_REUSE_MATRIX) {
227:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
228:     /* Really need to test rows and column sizes! */
229:     newmat = *B;
230:   } else {
231:     /* Create and fill new matrix */
232:     MatCreate(((PetscObject)A)->comm,&newmat);
233:     MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
234:     MatSetType(newmat,((PetscObject)A)->type_name);
235:     MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
236:   }

238:   /* Now extract the data pointers and do the copy, column at a time */
239:   newmatd = (Mat_MPIDense*)newmat->data;
240:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
241: 
242:   for (i=0; i<Ncols; i++) {
243:     av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
244:     for (j=0; j<nrows; j++) {
245:       *bv++ = av[irow[j] - rstart];
246:     }
247:   }

249:   /* Assemble the matrices so that the correct flags are set */
250:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
251:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

253:   /* Free work space */
254:   ISRestoreIndices(isrow,&irow);
255:   ISRestoreIndices(iscol,&icol);
256:   *B = newmat;
257:   return(0);
258: }

262: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
263: {
265:   return(0);
266: }

270: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
271: {
272:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
273:   MPI_Comm       comm = ((PetscObject)mat)->comm;
275:   PetscInt       nstash,reallocs;
276:   InsertMode     addv;

279:   /* make sure all processors are either in INSERTMODE or ADDMODE */
280:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
281:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
282:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
283:   }
284:   mat->insertmode = addv; /* in case this processor had no cache */

286:   MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
287:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
288:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
289:   return(0);
290: }

294: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
295: {
296:   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
297:   PetscErrorCode  ierr;
298:   PetscInt        i,*row,*col,flg,j,rstart,ncols;
299:   PetscMPIInt     n;
300:   PetscScalar     *val;
301:   InsertMode      addv=mat->insertmode;

304:   /*  wait on receives */
305:   while (1) {
306:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
307:     if (!flg) break;
308: 
309:     for (i=0; i<n;) {
310:       /* Now identify the consecutive vals belonging to the same row */
311:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
312:       if (j < n) ncols = j-i;
313:       else       ncols = n-i;
314:       /* Now assemble all these values with a single function call */
315:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
316:       i = j;
317:     }
318:   }
319:   MatStashScatterEnd_Private(&mat->stash);
320: 
321:   MatAssemblyBegin(mdn->A,mode);
322:   MatAssemblyEnd(mdn->A,mode);

324:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
325:     MatSetUpMultiply_MPIDense(mat);
326:   }
327:   return(0);
328: }

332: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333: {
335:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

338:   MatZeroEntries(l->A);
339:   return(0);
340: }

342: /* the code does not do the diagonal entries correctly unless the 
343:    matrix is square and the column and row owerships are identical.
344:    This is a BUG. The only way to fix it seems to be to access 
345:    mdn->A and mdn->B directly and not through the MatZeroRows() 
346:    routine. 
347: */
350: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
351: {
352:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
354:   PetscInt       i,*owners = A->rmap->range;
355:   PetscInt       *nprocs,j,idx,nsends;
356:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
357:   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
358:   PetscInt       *lens,*lrows,*values;
359:   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
360:   MPI_Comm       comm = ((PetscObject)A)->comm;
361:   MPI_Request    *send_waits,*recv_waits;
362:   MPI_Status     recv_status,*send_status;
363:   PetscTruth     found;

366:   /*  first count number of contributors to each processor */
367:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
368:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
369:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
370:   for (i=0; i<N; i++) {
371:     idx = rows[i];
372:     found = PETSC_FALSE;
373:     for (j=0; j<size; j++) {
374:       if (idx >= owners[j] && idx < owners[j+1]) {
375:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
376:       }
377:     }
378:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
379:   }
380:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

382:   /* inform other processors of number of messages and max length*/
383:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

385:   /* post receives:   */
386:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
387:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
388:   for (i=0; i<nrecvs; i++) {
389:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
390:   }

392:   /* do sends:
393:       1) starts[i] gives the starting index in svalues for stuff going to 
394:          the ith processor
395:   */
396:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
397:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
398:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
399:   starts[0]  = 0;
400:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401:   for (i=0; i<N; i++) {
402:     svalues[starts[owner[i]]++] = rows[i];
403:   }

405:   starts[0] = 0;
406:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407:   count = 0;
408:   for (i=0; i<size; i++) {
409:     if (nprocs[2*i+1]) {
410:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
411:     }
412:   }
413:   PetscFree(starts);

415:   base = owners[rank];

417:   /*  wait on receives */
418:   PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
419:   count  = nrecvs;
420:   slen   = 0;
421:   while (count) {
422:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
423:     /* unpack receives into our local space */
424:     MPI_Get_count(&recv_status,MPIU_INT,&n);
425:     source[imdex]  = recv_status.MPI_SOURCE;
426:     lens[imdex]    = n;
427:     slen += n;
428:     count--;
429:   }
430:   PetscFree(recv_waits);
431: 
432:   /* move the data into the send scatter */
433:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
434:   count = 0;
435:   for (i=0; i<nrecvs; i++) {
436:     values = rvalues + i*nmax;
437:     for (j=0; j<lens[i]; j++) {
438:       lrows[count++] = values[j] - base;
439:     }
440:   }
441:   PetscFree(rvalues);
442:   PetscFree2(lens,source);
443:   PetscFree(owner);
444:   PetscFree(nprocs);
445: 
446:   /* actually zap the local rows */
447:   MatZeroRows(l->A,slen,lrows,diag);
448:   PetscFree(lrows);

450:   /* wait on sends */
451:   if (nsends) {
452:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
453:     MPI_Waitall(nsends,send_waits,send_status);
454:     PetscFree(send_status);
455:   }
456:   PetscFree(send_waits);
457:   PetscFree(svalues);

459:   return(0);
460: }

464: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
465: {
466:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

470:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
471:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
472:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
473:   return(0);
474: }

478: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
479: {
480:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

484:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
485:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
486:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
487:   return(0);
488: }

492: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
493: {
494:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
496:   PetscScalar    zero = 0.0;

499:   VecSet(yy,zero);
500:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
501:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
502:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
503:   return(0);
504: }

508: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
509: {
510:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

514:   VecCopy(yy,zz);
515:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
516:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
517:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
518:   return(0);
519: }

523: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524: {
525:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
526:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
528:   PetscInt       len,i,n,m = A->rmap->n,radd;
529:   PetscScalar    *x,zero = 0.0;
530: 
532:   VecSet(v,zero);
533:   VecGetArray(v,&x);
534:   VecGetSize(v,&n);
535:   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
536:   len  = PetscMin(a->A->rmap->n,a->A->cmap->n);
537:   radd = A->rmap->rstart*m;
538:   for (i=0; i<len; i++) {
539:     x[i] = aloc->v[radd + i*m + i];
540:   }
541:   VecRestoreArray(v,&x);
542:   return(0);
543: }

547: PetscErrorCode MatDestroy_MPIDense(Mat mat)
548: {
549:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
551: #if defined(PETSC_HAVE_PLAPACK)
552:   Mat_Plapack   *lu=(Mat_Plapack*)(mat->spptr);
553: #endif


557: #if defined(PETSC_USE_LOG)
558:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
559: #endif
560:   MatStashDestroy_Private(&mat->stash);
561:   MatDestroy(mdn->A);
562:   if (mdn->lvec)   {VecDestroy(mdn->lvec);}
563:   if (mdn->Mvctx)  {VecScatterDestroy(mdn->Mvctx);}
564: #if defined(PETSC_HAVE_PLAPACK)
565:   if (lu) {
566:     PLA_Obj_free(&lu->A);
567:     PLA_Obj_free (&lu->pivots);
568:     PLA_Temp_free(&lu->templ);

570:     if (lu->is_pla) {
571:       ISDestroy(lu->is_pla);
572:       ISDestroy(lu->is_petsc);
573:       VecScatterDestroy(lu->ctx);
574:     }
575:   }
576: #endif

578:   PetscFree(mdn);
579:   PetscObjectChangeTypeName((PetscObject)mat,0);
580:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
581:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
582:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
583:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
584:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
585:   return(0);
586: }

590: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
591: {
592:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
593:   PetscErrorCode    ierr;
594:   PetscViewerFormat format;
595:   int               fd;
596:   PetscInt          header[4],mmax,N = mat->cmap->N,i,j,m,k;
597:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
598:   PetscScalar       *work,*v,*vv;
599:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
600:   MPI_Status        status;

603:   if (mdn->size == 1) {
604:     MatView(mdn->A,viewer);
605:   } else {
606:     PetscViewerBinaryGetDescriptor(viewer,&fd);
607:     MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
608:     MPI_Comm_size(((PetscObject)mat)->comm,&size);

610:     PetscViewerGetFormat(viewer,&format);
611:     if (format == PETSC_VIEWER_NATIVE) {

613:       if (!rank) {
614:         /* store the matrix as a dense matrix */
615:         header[0] = MAT_FILE_COOKIE;
616:         header[1] = mat->rmap->N;
617:         header[2] = N;
618:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
619:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

621:         /* get largest work array needed for transposing array */
622:         mmax = mat->rmap->n;
623:         for (i=1; i<size; i++) {
624:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
625:         }
626:         PetscMalloc(mmax*N*sizeof(PetscScalar),&work);

628:         /* write out local array, by rows */
629:         m    = mat->rmap->n;
630:         v    = a->v;
631:         for (j=0; j<N; j++) {
632:           for (i=0; i<m; i++) {
633:             work[j + i*N] = *v++;
634:           }
635:         }
636:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
637:         /* get largest work array to receive messages from other processes, excludes process zero */
638:         mmax = 0;
639:         for (i=1; i<size; i++) {
640:           mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
641:         }
642:         PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
643:         for(k = 1; k < size; k++) {
644:           v    = vv;
645:           m    = mat->rmap->range[k+1] - mat->rmap->range[k];
646:           MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);

648:           for(j = 0; j < N; j++) {
649:             for(i = 0; i < m; i++) {
650:               work[j + i*N] = *v++;
651:             }
652:           }
653:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
654:         }
655:         PetscFree(work);
656:         PetscFree(vv);
657:       } else {
658:         MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
659:       }
660:     } else {
661:       SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
662:     }
663:   }
664:   return(0);
665: }

669: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
670: {
671:   Mat_MPIDense          *mdn = (Mat_MPIDense*)mat->data;
672:   PetscErrorCode        ierr;
673:   PetscMPIInt           size = mdn->size,rank = mdn->rank;
674:   const PetscViewerType vtype;
675:   PetscTruth            iascii,isdraw;
676:   PetscViewer           sviewer;
677:   PetscViewerFormat     format;
678: #if defined(PETSC_HAVE_PLAPACK)
679:   Mat_Plapack           *lu;
680: #endif

683:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
684:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
685:   if (iascii) {
686:     PetscViewerGetType(viewer,&vtype);
687:     PetscViewerGetFormat(viewer,&format);
688:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
689:       MatInfo info;
690:       MatGetInfo(mat,MAT_LOCAL,&info);
691:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
692:                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
693:       PetscViewerFlush(viewer);
694: #if defined(PETSC_HAVE_PLAPACK)
695:       PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
696:       PetscViewerASCIIPrintf(viewer,"  Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
697:       PetscViewerASCIIPrintf(viewer,"  Error checking: %d\n",Plapack_ierror);
698:       PetscViewerASCIIPrintf(viewer,"  Algorithmic block size: %d\n",Plapack_nb_alg);
699:       if (mat->factor){
700:         lu=(Mat_Plapack*)(mat->spptr);
701:         PetscViewerASCIIPrintf(viewer,"  Distr. block size nb: %d \n",lu->nb);
702:       }
703: #else
704:       VecScatterView(mdn->Mvctx,viewer);
705: #endif
706:       return(0);
707:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
708:       return(0);
709:     }
710:   } else if (isdraw) {
711:     PetscDraw  draw;
712:     PetscTruth isnull;

714:     PetscViewerDrawGetDraw(viewer,0,&draw);
715:     PetscDrawIsNull(draw,&isnull);
716:     if (isnull) return(0);
717:   }

719:   if (size == 1) {
720:     MatView(mdn->A,viewer);
721:   } else {
722:     /* assemble the entire matrix onto first processor. */
723:     Mat         A;
724:     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
725:     PetscInt    *cols;
726:     PetscScalar *vals;

728:     MatCreate(((PetscObject)mat)->comm,&A);
729:     if (!rank) {
730:       MatSetSizes(A,M,N,M,N);
731:     } else {
732:       MatSetSizes(A,0,0,M,N);
733:     }
734:     /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
735:     MatSetType(A,MATMPIDENSE);
736:     MatMPIDenseSetPreallocation(A,PETSC_NULL);
737:     PetscLogObjectParent(mat,A);

739:     /* Copy the matrix ... This isn't the most efficient means,
740:        but it's quick for now */
741:     A->insertmode = INSERT_VALUES;
742:     row = mat->rmap->rstart; m = mdn->A->rmap->n;
743:     for (i=0; i<m; i++) {
744:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
745:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
746:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
747:       row++;
748:     }

750:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
751:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
752:     PetscViewerGetSingleton(viewer,&sviewer);
753:     if (!rank) {
754:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
755:     }
756:     PetscViewerRestoreSingleton(viewer,&sviewer);
757:     PetscViewerFlush(viewer);
758:     MatDestroy(A);
759:   }
760:   return(0);
761: }

765: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
766: {
768:   PetscTruth     iascii,isbinary,isdraw,issocket;
769: 
771: 
772:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
773:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
774:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
775:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

777:   if (iascii || issocket || isdraw) {
778:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
779:   } else if (isbinary) {
780:     MatView_MPIDense_Binary(mat,viewer);
781:   } else {
782:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
783:   }
784:   return(0);
785: }

789: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
790: {
791:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
792:   Mat            mdn = mat->A;
794:   PetscReal      isend[5],irecv[5];

797:   info->block_size     = 1.0;
798:   MatGetInfo(mdn,MAT_LOCAL,info);
799:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800:   isend[3] = info->memory;  isend[4] = info->mallocs;
801:   if (flag == MAT_LOCAL) {
802:     info->nz_used      = isend[0];
803:     info->nz_allocated = isend[1];
804:     info->nz_unneeded  = isend[2];
805:     info->memory       = isend[3];
806:     info->mallocs      = isend[4];
807:   } else if (flag == MAT_GLOBAL_MAX) {
808:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
809:     info->nz_used      = irecv[0];
810:     info->nz_allocated = irecv[1];
811:     info->nz_unneeded  = irecv[2];
812:     info->memory       = irecv[3];
813:     info->mallocs      = irecv[4];
814:   } else if (flag == MAT_GLOBAL_SUM) {
815:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
816:     info->nz_used      = irecv[0];
817:     info->nz_allocated = irecv[1];
818:     info->nz_unneeded  = irecv[2];
819:     info->memory       = irecv[3];
820:     info->mallocs      = irecv[4];
821:   }
822:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
823:   info->fill_ratio_needed = 0;
824:   info->factor_mallocs    = 0;
825:   return(0);
826: }

830: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
831: {
832:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

836:   switch (op) {
837:   case MAT_NEW_NONZERO_LOCATIONS:
838:   case MAT_NEW_NONZERO_LOCATION_ERR:
839:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
840:     MatSetOption(a->A,op,flg);
841:     break;
842:   case MAT_ROW_ORIENTED:
843:     a->roworiented = flg;
844:     MatSetOption(a->A,op,flg);
845:     break;
846:   case MAT_NEW_DIAGONALS:
847:   case MAT_USE_HASH_TABLE:
848:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
849:     break;
850:   case MAT_IGNORE_OFF_PROC_ENTRIES:
851:     a->donotstash = flg;
852:     break;
853:   case MAT_SYMMETRIC:
854:   case MAT_STRUCTURALLY_SYMMETRIC:
855:   case MAT_HERMITIAN:
856:   case MAT_SYMMETRY_ETERNAL:
857:   case MAT_IGNORE_LOWER_TRIANGULAR:
858:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
859:     break;
860:   default:
861:     SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
862:   }
863:   return(0);
864: }


869: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
870: {
871:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
872:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
873:   PetscScalar    *l,*r,x,*v;
875:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;

878:   MatGetLocalSize(A,&s2,&s3);
879:   if (ll) {
880:     VecGetLocalSize(ll,&s2a);
881:     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
882:     VecGetArray(ll,&l);
883:     for (i=0; i<m; i++) {
884:       x = l[i];
885:       v = mat->v + i;
886:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
887:     }
888:     VecRestoreArray(ll,&l);
889:     PetscLogFlops(n*m);
890:   }
891:   if (rr) {
892:     VecGetLocalSize(rr,&s3a);
893:     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
894:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
895:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
896:     VecGetArray(mdn->lvec,&r);
897:     for (i=0; i<n; i++) {
898:       x = r[i];
899:       v = mat->v + i*m;
900:       for (j=0; j<m; j++) { (*v++) *= x;}
901:     }
902:     VecRestoreArray(mdn->lvec,&r);
903:     PetscLogFlops(n*m);
904:   }
905:   return(0);
906: }

910: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
911: {
912:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
913:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
915:   PetscInt       i,j;
916:   PetscReal      sum = 0.0;
917:   PetscScalar    *v = mat->v;

920:   if (mdn->size == 1) {
921:      MatNorm(mdn->A,type,nrm);
922:   } else {
923:     if (type == NORM_FROBENIUS) {
924:       for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
925: #if defined(PETSC_USE_COMPLEX)
926:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927: #else
928:         sum += (*v)*(*v); v++;
929: #endif
930:       }
931:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
932:       *nrm = sqrt(*nrm);
933:       PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
934:     } else if (type == NORM_1) {
935:       PetscReal *tmp,*tmp2;
936:       PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);
937:       PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
938:       PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
939:       *nrm = 0.0;
940:       v = mat->v;
941:       for (j=0; j<mdn->A->cmap->n; j++) {
942:         for (i=0; i<mdn->A->rmap->n; i++) {
943:           tmp[j] += PetscAbsScalar(*v);  v++;
944:         }
945:       }
946:       MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
947:       for (j=0; j<A->cmap->N; j++) {
948:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
949:       }
950:       PetscFree2(tmp,tmp);
951:       PetscLogFlops(A->cmap->n*A->rmap->n);
952:     } else if (type == NORM_INFINITY) { /* max row norm */
953:       PetscReal ntemp;
954:       MatNorm(mdn->A,type,&ntemp);
955:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
956:     } else {
957:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
958:     }
959:   }
960:   return(0);
961: }

965: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
966: {
967:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
968:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
969:   Mat            B;
970:   PetscInt       M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
972:   PetscInt       j,i;
973:   PetscScalar    *v;

976:   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
977:   if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
978:     MatCreate(((PetscObject)A)->comm,&B);
979:     MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
980:     MatSetType(B,((PetscObject)A)->type_name);
981:     MatMPIDenseSetPreallocation(B,PETSC_NULL);
982:   } else {
983:     B = *matout;
984:   }

986:   m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
987:   PetscMalloc(m*sizeof(PetscInt),&rwork);
988:   for (i=0; i<m; i++) rwork[i] = rstart + i;
989:   for (j=0; j<n; j++) {
990:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
991:     v   += m;
992:   }
993:   PetscFree(rwork);
994:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
995:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
996:   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
997:     *matout = B;
998:   } else {
999:     MatHeaderCopy(A,B);
1000:   }
1001:   return(0);
1002: }


1005: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

1010: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1011: {

1015:    MatMPIDenseSetPreallocation(A,0);
1016:   return(0);
1017: }

1019: #if defined(PETSC_HAVE_PLAPACK)

1023: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1024: {
1025:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1027:   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1028:   PetscScalar    *array;
1029:   PetscReal      one = 1.0;

1032:   /* Copy A into F->lu->A */
1033:   PLA_Obj_set_to_zero(lu->A);
1034:   PLA_API_begin();
1035:   PLA_Obj_API_open(lu->A);
1036:   MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1037:   MatGetArray(A,&array);
1038:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1039:   MatRestoreArray(A,&array);
1040:   PLA_Obj_API_close(lu->A);
1041:   PLA_API_end();
1042:   lu->rstart = rstart;
1043:   return(0);
1044: }

1048: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1049: {
1050:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1052:   PetscInt       M=A->cmap->N,m=A->rmap->n,rstart;
1053:   PetscScalar    *array;
1054:   PetscReal      one = 1.0;

1057:   /* Copy F into A->lu->A */
1058:   MatZeroEntries(A);
1059:   PLA_API_begin();
1060:   PLA_Obj_API_open(lu->A);
1061:   MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1062:   MatGetArray(A,&array);
1063:   PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1064:   MatRestoreArray(A,&array);
1065:   PLA_Obj_API_close(lu->A);
1066:   PLA_API_end();
1067:   lu->rstart = rstart;
1068:   return(0);
1069: }

1073: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1074: {
1076:   Mat_Plapack    *luA = (Mat_Plapack*)A->spptr;
1077:   Mat_Plapack    *luB = (Mat_Plapack*)B->spptr;
1078:   Mat_Plapack    *luC = (Mat_Plapack*)C->spptr;
1079:   PLA_Obj        alpha = NULL,beta = NULL;

1082:   MatMPIDenseCopyToPlapack(A,A);
1083:   MatMPIDenseCopyToPlapack(B,B);

1085:   /* 
1086:   PLA_Global_show("A = ",luA->A,"%g ","");
1087:   PLA_Global_show("B = ",luB->A,"%g ","");
1088:   */

1090:   /* do the multiply in PLA  */
1091:   PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1092:   PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1093:   CHKMEMQ;

1095:   PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /*  */
1096:   CHKMEMQ;
1097:   PLA_Obj_free(&alpha);
1098:   PLA_Obj_free(&beta);

1100:   /*
1101:   PLA_Global_show("C = ",luC->A,"%g ","");
1102:   */
1103:   MatMPIDenseCopyFromPlapack(C,C);
1104:   return(0);
1105: }

1109: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1110: {
1112:   PetscInt       m=A->rmap->n,n=B->cmap->n;
1113:   Mat            Cmat;

1116:   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1117:   SETERRQ(PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1118:   MatCreate(((PetscObject)B)->comm,&Cmat);
1119:   MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1120:   MatSetType(Cmat,MATMPIDENSE);
1121:   MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1122:   MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);

1124:   *C = Cmat;
1125:   return(0);
1126: }

1130: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1131: {

1135:   if (scall == MAT_INITIAL_MATRIX){
1136:     MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1137:   }
1138:   MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1139:   return(0);
1140: }

1144: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1145: {
1146:   MPI_Comm       comm = ((PetscObject)A)->comm;
1147:   Mat_Plapack    *lu = (Mat_Plapack*)A->spptr;
1149:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1150:   PetscScalar    *array;
1151:   PetscReal      one = 1.0;
1152:   PetscMPIInt    size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1153:   PLA_Obj        v_pla = NULL;
1154:   PetscScalar    *loc_buf;
1155:   Vec            loc_x;
1156: 
1158:   MPI_Comm_size(comm,&size);
1159:   MPI_Comm_rank(comm,&rank);

1161:   /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1162:   PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1163:   PLA_Obj_set_to_zero(v_pla);

1165:   /* Copy b into rhs_pla */
1166:   PLA_API_begin();
1167:   PLA_Obj_API_open(v_pla);
1168:   VecGetArray(b,&array);
1169:   PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1170:   VecRestoreArray(b,&array);
1171:   PLA_Obj_API_close(v_pla);
1172:   PLA_API_end();

1174:   if (A->factor == MAT_FACTOR_LU){
1175:     /* Apply the permutations to the right hand sides */
1176:     PLA_Apply_pivots_to_rows (v_pla,lu->pivots);

1178:     /* Solve L y = b, overwriting b with y */
1179:     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );

1181:     /* Solve U x = y (=b), overwriting b with x */
1182:     PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1183:   } else { /* MAT_FACTOR_CHOLESKY */
1184:     PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1185:     PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1186:                                     PLA_NONUNIT_DIAG,lu->A,v_pla);
1187:   }

1189:   /* Copy PLAPACK x into Petsc vector x  */
1190:   PLA_Obj_local_length(v_pla, &loc_m);
1191:   PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1192:   PLA_Obj_local_stride(v_pla, &loc_stride);
1193:   /*
1194:     PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb); 
1195:   */
1196:   VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);
1197:   if (!lu->pla_solved){
1198: 
1199:     PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1200:     PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);

1202:     /* Create IS and cts for VecScatterring */
1203:     PLA_Obj_local_length(v_pla, &loc_m);
1204:     PLA_Obj_local_stride(v_pla, &loc_stride);
1205:     PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);

1207:     rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1208:     for (i=0; i<loc_m; i+=lu->nb){
1209:       j = 0;
1210:       while (j < lu->nb && i+j < loc_m){
1211:         idx_petsc[i+j] = rstart + j; j++;
1212:       }
1213:       rstart += size*lu->nb;
1214:     }

1216:     for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;

1218:     ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);
1219:     ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);
1220:     PetscFree2(idx_pla,idx_petsc);
1221:     VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1222:   }
1223:   VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1224:   VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1225: 
1226:   /* Free data */
1227:   VecDestroy(loc_x);
1228:   PLA_Obj_free(&v_pla);

1230:   lu->pla_solved = PETSC_TRUE;
1231:   return(0);
1232: }

1236: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1237: {
1238:   Mat_Plapack    *lu = (Mat_Plapack*)(F)->spptr;
1240:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1241:   PetscInt       info_pla=0;
1242:   PetscScalar    *array,one = 1.0;

1245:   if (lu->mstruct == SAME_NONZERO_PATTERN){
1246:     PLA_Obj_free(&lu->A);
1247:     PLA_Obj_free (&lu->pivots);
1248:   }
1249:   /* Create PLAPACK matrix object */
1250:   lu->A = NULL; lu->pivots = NULL;
1251:   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1252:   PLA_Obj_set_to_zero(lu->A);
1253:   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);

1255:   /* Copy A into lu->A */
1256:   PLA_API_begin();
1257:   PLA_Obj_API_open(lu->A);
1258:   MatGetOwnershipRange(A,&rstart,&rend);
1259:   MatGetArray(A,&array);
1260:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1261:   MatRestoreArray(A,&array);
1262:   PLA_Obj_API_close(lu->A);
1263:   PLA_API_end();

1265:   /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1266:   info_pla = PLA_LU(lu->A,lu->pivots);
1267:   if (info_pla != 0)
1268:     SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);

1270:   lu->rstart         = rstart;
1271:   lu->mstruct        = SAME_NONZERO_PATTERN;
1272:   F->ops->solve      = MatSolve_MPIDense;
1273:   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1274:   return(0);
1275: }

1279: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1280: {
1281:   Mat_Plapack    *lu = (Mat_Plapack*)F->spptr;
1283:   PetscInt       M=A->rmap->N,m=A->rmap->n,rstart,rend;
1284:   PetscInt       info_pla=0;
1285:   PetscScalar    *array,one = 1.0;

1288:   if (lu->mstruct == SAME_NONZERO_PATTERN){
1289:     PLA_Obj_free(&lu->A);
1290:   }
1291:   /* Create PLAPACK matrix object */
1292:   lu->A      = NULL;
1293:   lu->pivots = NULL;
1294:   PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);

1296:   /* Copy A into lu->A */
1297:   PLA_API_begin();
1298:   PLA_Obj_API_open(lu->A);
1299:   MatGetOwnershipRange(A,&rstart,&rend);
1300:   MatGetArray(A,&array);
1301:   PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1302:   MatRestoreArray(A,&array);
1303:   PLA_Obj_API_close(lu->A);
1304:   PLA_API_end();

1306:   /* Factor P A -> Chol */
1307:   info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1308:   if (info_pla != 0)
1309:     SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);

1311:   lu->rstart         = rstart;
1312:   lu->mstruct        = SAME_NONZERO_PATTERN;
1313:   F->ops->solve      = MatSolve_MPIDense;
1314:   F->assembled       = PETSC_TRUE;  /* required by -ksp_view */
1315:   return(0);
1316: }

1318: /* Note the Petsc perm permutation is ignored */
1321: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1322: {
1324:   PetscTruth     issymmetric,set;

1327:   MatIsSymmetricKnown(A,&set,&issymmetric);
1328:   if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1329:   F->ops->choleskyfactornumeric  = MatCholeskyFactorNumeric_MPIDense;
1330:   return(0);
1331: }

1333: /* Note the Petsc r and c permutations are ignored */
1336: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1337: {
1339:   PetscInt       M = A->rmap->N;
1340:   Mat_Plapack    *lu;

1343:   lu = (Mat_Plapack*)F->spptr;
1344:   PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1345:   F->ops->lufactornumeric  = MatLUFactorNumeric_MPIDense;
1346:   return(0);
1347: }

1352: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1353: {
1355:   *type = MAT_SOLVER_PLAPACK;
1356:   return(0);
1357: }

1363: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1364: {
1366:   Mat_Plapack    *lu;
1367:   PetscMPIInt    size;
1368:   PetscInt       M=A->rmap->N;

1371:   /* Create the factorization matrix */
1372:   MatCreate(((PetscObject)A)->comm,F);
1373:   MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1374:   MatSetType(*F,((PetscObject)A)->type_name);
1375:   PetscNewLog(*F,Mat_Plapack,&lu);
1376:   (*F)->spptr = (void*)lu;

1378:   /* Set default Plapack parameters */
1379:   MPI_Comm_size(((PetscObject)A)->comm,&size);
1380:   lu->nb = M/size;
1381:   if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1382: 
1383:   /* Set runtime options */
1384:   PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1385:     PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1386:   PetscOptionsEnd();

1388:   /* Create object distribution template */
1389:   lu->templ = NULL;
1390:   PLA_Temp_create(lu->nb, 0, &lu->templ);

1392:   /* Set the datatype */
1393: #if defined(PETSC_USE_COMPLEX)
1394:   lu->datatype = MPI_DOUBLE_COMPLEX;
1395: #else
1396:   lu->datatype = MPI_DOUBLE;
1397: #endif

1399:   PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);


1402:   lu->pla_solved     = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1403:   lu->mstruct        = DIFFERENT_NONZERO_PATTERN;

1405:   if (ftype == MAT_FACTOR_LU) {
1406:     (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1407:   } else if (ftype == MAT_FACTOR_CHOLESKY) {
1408:     (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1409:   } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1410:   (*F)->factor = ftype;
1411:   PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1412:   return(0);
1413: }
1415: #endif

1419: PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1420: {
1421: #if defined(PETSC_HAVE_PLAPACK)
1423: #endif

1426: #if defined(PETSC_HAVE_PLAPACK)
1427:   MatGetFactor_mpidense_plapack(A,ftype,F);
1428: #else
1429:   SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1430: #endif
1431:   return(0);
1432: }

1436: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1437: {
1439:   Mat_MPIDense   *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;

1442:   MatAXPY(A->A,alpha,B->A,str);
1443:   return(0);
1444: }

1448: PetscErrorCode  MatConjugate_MPIDense(Mat mat)
1449: {
1450:   Mat_MPIDense   *a = (Mat_MPIDense *)mat->data;

1454:   MatConjugate(a->A);
1455:   return(0);
1456: }

1460: PetscErrorCode MatRealPart_MPIDense(Mat A)
1461: {
1462:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1466:   MatRealPart(a->A);
1467:   return(0);
1468: }

1472: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1473: {
1474:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

1478:   MatImaginaryPart(a->A);
1479:   return(0);
1480: }

1482: /* -------------------------------------------------------------------*/
1483: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1484:        MatGetRow_MPIDense,
1485:        MatRestoreRow_MPIDense,
1486:        MatMult_MPIDense,
1487: /* 4*/ MatMultAdd_MPIDense,
1488:        MatMultTranspose_MPIDense,
1489:        MatMultTransposeAdd_MPIDense,
1490:        0,
1491:        0,
1492:        0,
1493: /*10*/ 0,
1494:        0,
1495:        0,
1496:        0,
1497:        MatTranspose_MPIDense,
1498: /*15*/ MatGetInfo_MPIDense,
1499:        MatEqual_MPIDense,
1500:        MatGetDiagonal_MPIDense,
1501:        MatDiagonalScale_MPIDense,
1502:        MatNorm_MPIDense,
1503: /*20*/ MatAssemblyBegin_MPIDense,
1504:        MatAssemblyEnd_MPIDense,
1505:        MatSetOption_MPIDense,
1506:        MatZeroEntries_MPIDense,
1507: /*24*/ MatZeroRows_MPIDense,
1508:        0,
1509:        0,
1510:        0,
1511:        0,
1512: /*29*/ MatSetUpPreallocation_MPIDense,
1513:        0,
1514:        0,
1515:        MatGetArray_MPIDense,
1516:        MatRestoreArray_MPIDense,
1517: /*34*/ MatDuplicate_MPIDense,
1518:        0,
1519:        0,
1520:        0,
1521:        0,
1522: /*39*/ MatAXPY_MPIDense,
1523:        MatGetSubMatrices_MPIDense,
1524:        0,
1525:        MatGetValues_MPIDense,
1526:        0,
1527: /*44*/ 0,
1528:        MatScale_MPIDense,
1529:        0,
1530:        0,
1531:        0,
1532: /*49*/ 0,
1533:        0,
1534:        0,
1535:        0,
1536:        0,
1537: /*54*/ 0,
1538:        0,
1539:        0,
1540:        0,
1541:        0,
1542: /*59*/ MatGetSubMatrix_MPIDense,
1543:        MatDestroy_MPIDense,
1544:        MatView_MPIDense,
1545:        0,
1546:        0,
1547: /*64*/ 0,
1548:        0,
1549:        0,
1550:        0,
1551:        0,
1552: /*69*/ 0,
1553:        0,
1554:        0,
1555:        0,
1556:        0,
1557: /*74*/ 0,
1558:        0,
1559:        0,
1560:        0,
1561:        0,
1562: /*79*/ 0,
1563:        0,
1564:        0,
1565:        0,
1566: /*83*/ MatLoad_MPIDense,
1567:        0,
1568:        0,
1569:        0,
1570:        0,
1571:        0,
1572: /*89*/
1573: #if defined(PETSC_HAVE_PLAPACK)
1574:        MatMatMult_MPIDense_MPIDense,
1575:        MatMatMultSymbolic_MPIDense_MPIDense,
1576:        MatMatMultNumeric_MPIDense_MPIDense,
1577: #else
1578:        0,
1579:        0,
1580:        0,
1581: #endif
1582:        0,
1583:        0,
1584: /*94*/ 0,
1585:        0,
1586:        0,
1587:        0,
1588:        0,
1589: /*99*/ 0,
1590:        0,
1591:        0,
1592:        MatConjugate_MPIDense,
1593:        0,
1594: /*104*/0,
1595:        MatRealPart_MPIDense,
1596:        MatImaginaryPart_MPIDense,
1597:        0
1598: };

1603: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1604: {
1605:   Mat_MPIDense   *a;

1609:   mat->preallocated = PETSC_TRUE;
1610:   /* Note:  For now, when data is specified above, this assumes the user correctly
1611:    allocates the local dense storage space.  We should add error checking. */

1613:   a    = (Mat_MPIDense*)mat->data;
1614:   MatCreate(PETSC_COMM_SELF,&a->A);
1615:   MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1616:   MatSetType(a->A,MATSEQDENSE);
1617:   MatSeqDenseSetPreallocation(a->A,data);
1618:   PetscLogObjectParent(mat,a->A);
1619:   return(0);
1620: }

1623: /*MC
1624:    MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices

1626:   run config/configure.py with the option --download-plapack


1629:   Options Database Keys:
1630: . -mat_plapack_nprows <n> - number of rows in processor partition
1631: . -mat_plapack_npcols <n> - number of columns in processor partition
1632: . -mat_plapack_nb <n> - block size of template vector
1633: . -mat_plapack_nb_alg <n> - algorithmic block size
1634: - -mat_plapack_ckerror <n> - error checking flag

1636:    Level: intermediate

1638: .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage

1640: M*/

1645: PetscErrorCode  MatCreate_MPIDense(Mat mat)
1646: {
1647:   Mat_MPIDense   *a;

1651:   PetscNewLog(mat,Mat_MPIDense,&a);
1652:   mat->data         = (void*)a;
1653:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1654:   mat->mapping      = 0;

1656:   mat->insertmode = NOT_SET_VALUES;
1657:   MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1658:   MPI_Comm_size(((PetscObject)mat)->comm,&a->size);

1660:   PetscLayoutSetBlockSize(mat->rmap,1);
1661:   PetscLayoutSetBlockSize(mat->cmap,1);
1662:   PetscLayoutSetUp(mat->rmap);
1663:   PetscLayoutSetUp(mat->cmap);
1664:   a->nvec = mat->cmap->n;

1666:   /* build cache for off array entries formed */
1667:   a->donotstash = PETSC_FALSE;
1668:   MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);

1670:   /* stuff used for matrix vector multiply */
1671:   a->lvec        = 0;
1672:   a->Mvctx       = 0;
1673:   a->roworiented = PETSC_TRUE;

1675:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1676:                                      "MatGetDiagonalBlock_MPIDense",
1677:                                      MatGetDiagonalBlock_MPIDense);
1678:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1679:                                      "MatMPIDenseSetPreallocation_MPIDense",
1680:                                      MatMPIDenseSetPreallocation_MPIDense);
1681:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1682:                                      "MatMatMult_MPIAIJ_MPIDense",
1683:                                       MatMatMult_MPIAIJ_MPIDense);
1684:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1685:                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1686:                                       MatMatMultSymbolic_MPIAIJ_MPIDense);
1687:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1688:                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1689:                                       MatMatMultNumeric_MPIAIJ_MPIDense);
1690:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1691:                                      "MatGetFactor_mpidense_petsc",
1692:                                       MatGetFactor_mpidense_petsc);
1693: #if defined(PETSC_HAVE_PLAPACK)
1694:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1695:                                      "MatGetFactor_mpidense_plapack",
1696:                                       MatGetFactor_mpidense_plapack);
1697:   PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1698: #endif
1699:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);

1701:   return(0);
1702: }

1705: /*MC
1706:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1708:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1709:    and MATMPIDENSE otherwise.

1711:    Options Database Keys:
1712: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1714:   Level: beginner


1717: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1718: M*/

1723: PetscErrorCode  MatCreate_Dense(Mat A)
1724: {
1726:   PetscMPIInt    size;

1729:   MPI_Comm_size(((PetscObject)A)->comm,&size);
1730:   if (size == 1) {
1731:     MatSetType(A,MATSEQDENSE);
1732:   } else {
1733:     MatSetType(A,MATMPIDENSE);
1734:   }
1735:   return(0);
1736: }

1741: /*@C
1742:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1744:    Not collective

1746:    Input Parameters:
1747: .  A - the matrix
1748: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1749:    to control all matrix memory allocation.

1751:    Notes:
1752:    The dense format is fully compatible with standard Fortran 77
1753:    storage by columns.

1755:    The data input variable is intended primarily for Fortran programmers
1756:    who wish to allocate their own matrix memory space.  Most users should
1757:    set data=PETSC_NULL.

1759:    Level: intermediate

1761: .keywords: matrix,dense, parallel

1763: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1764: @*/
1765: PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1766: {
1767:   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);

1770:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1771:   if (f) {
1772:     (*f)(mat,data);
1773:   }
1774:   return(0);
1775: }

1779: /*@C
1780:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1782:    Collective on MPI_Comm

1784:    Input Parameters:
1785: +  comm - MPI communicator
1786: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1788: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1789: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1790: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1791:    to control all matrix memory allocation.

1793:    Output Parameter:
1794: .  A - the matrix

1796:    Notes:
1797:    The dense format is fully compatible with standard Fortran 77
1798:    storage by columns.

1800:    The data input variable is intended primarily for Fortran programmers
1801:    who wish to allocate their own matrix memory space.  Most users should
1802:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1804:    The user MUST specify either the local or global matrix dimensions
1805:    (possibly both).

1807:    Level: intermediate

1809: .keywords: matrix,dense, parallel

1811: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1812: @*/
1813: PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1814: {
1816:   PetscMPIInt    size;

1819:   MatCreate(comm,A);
1820:   MatSetSizes(*A,m,n,M,N);
1821:   MPI_Comm_size(comm,&size);
1822:   if (size > 1) {
1823:     MatSetType(*A,MATMPIDENSE);
1824:     MatMPIDenseSetPreallocation(*A,data);
1825:   } else {
1826:     MatSetType(*A,MATSEQDENSE);
1827:     MatSeqDenseSetPreallocation(*A,data);
1828:   }
1829:   return(0);
1830: }

1834: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1835: {
1836:   Mat            mat;
1837:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1841:   *newmat       = 0;
1842:   MatCreate(((PetscObject)A)->comm,&mat);
1843:   MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1844:   MatSetType(mat,((PetscObject)A)->type_name);
1845:   a                 = (Mat_MPIDense*)mat->data;
1846:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1847:   mat->factor       = A->factor;
1848:   mat->assembled    = PETSC_TRUE;
1849:   mat->preallocated = PETSC_TRUE;

1851:   mat->rmap->rstart     = A->rmap->rstart;
1852:   mat->rmap->rend       = A->rmap->rend;
1853:   a->size              = oldmat->size;
1854:   a->rank              = oldmat->rank;
1855:   mat->insertmode      = NOT_SET_VALUES;
1856:   a->nvec              = oldmat->nvec;
1857:   a->donotstash        = oldmat->donotstash;
1858: 
1859:   PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));
1860:   PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));

1862:   MatSetUpMultiply_MPIDense(mat);
1863:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1864:   PetscLogObjectParent(mat,a->A);

1866:   *newmat = mat;
1867:   return(0);
1868: }

1872: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1873: {
1875:   PetscMPIInt    rank,size;
1876:   PetscInt       *rowners,i,m,nz,j;
1877:   PetscScalar    *array,*vals,*vals_ptr;
1878:   MPI_Status     status;

1881:   MPI_Comm_rank(comm,&rank);
1882:   MPI_Comm_size(comm,&size);

1884:   /* determine ownership of all rows */
1885:   m          = M/size + ((M % size) > rank);
1886:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1887:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1888:   rowners[0] = 0;
1889:   for (i=2; i<=size; i++) {
1890:     rowners[i] += rowners[i-1];
1891:   }

1893:   MatCreate(comm,newmat);
1894:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1895:   MatSetType(*newmat,type);
1896:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1897:   MatGetArray(*newmat,&array);

1899:   if (!rank) {
1900:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1902:     /* read in my part of the matrix numerical values  */
1903:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1904: 
1905:     /* insert into matrix-by row (this is why cannot directly read into array */
1906:     vals_ptr = vals;
1907:     for (i=0; i<m; i++) {
1908:       for (j=0; j<N; j++) {
1909:         array[i + j*m] = *vals_ptr++;
1910:       }
1911:     }

1913:     /* read in other processors and ship out */
1914:     for (i=1; i<size; i++) {
1915:       nz   = (rowners[i+1] - rowners[i])*N;
1916:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1917:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);
1918:     }
1919:   } else {
1920:     /* receive numeric values */
1921:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1923:     /* receive message of values*/
1924:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);

1926:     /* insert into matrix-by row (this is why cannot directly read into array */
1927:     vals_ptr = vals;
1928:     for (i=0; i<m; i++) {
1929:       for (j=0; j<N; j++) {
1930:         array[i + j*m] = *vals_ptr++;
1931:       }
1932:     }
1933:   }
1934:   PetscFree(rowners);
1935:   PetscFree(vals);
1936:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1937:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1938:   return(0);
1939: }

1943: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1944: {
1945:   Mat            A;
1946:   PetscScalar    *vals,*svals;
1947:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1948:   MPI_Status     status;
1949:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1950:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1951:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1952:   PetscInt       i,nz,j,rstart,rend;
1953:   int            fd;

1957:   MPI_Comm_size(comm,&size);
1958:   MPI_Comm_rank(comm,&rank);
1959:   if (!rank) {
1960:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1961:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1962:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1963:   }

1965:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1966:   M = header[1]; N = header[2]; nz = header[3];

1968:   /*
1969:        Handle case where matrix is stored on disk as a dense matrix 
1970:   */
1971:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1972:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1973:     return(0);
1974:   }

1976:   /* determine ownership of all rows */
1977:   m          = PetscMPIIntCast(M/size + ((M % size) > rank));
1978:   PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
1979:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1980:   rowners[0] = 0;
1981:   for (i=2; i<=size; i++) {
1982:     rowners[i] += rowners[i-1];
1983:   }
1984:   rstart = rowners[rank];
1985:   rend   = rowners[rank+1];

1987:   /* distribute row lengths to all processors */
1988:   PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);
1989:   if (!rank) {
1990:     PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1991:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1992:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1993:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1994:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1995:     PetscFree(sndcounts);
1996:   } else {
1997:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1998:   }

2000:   if (!rank) {
2001:     /* calculate the number of nonzeros on each processor */
2002:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
2003:     PetscMemzero(procsnz,size*sizeof(PetscInt));
2004:     for (i=0; i<size; i++) {
2005:       for (j=rowners[i]; j< rowners[i+1]; j++) {
2006:         procsnz[i] += rowlengths[j];
2007:       }
2008:     }
2009:     PetscFree(rowlengths);

2011:     /* determine max buffer needed and allocate it */
2012:     maxnz = 0;
2013:     for (i=0; i<size; i++) {
2014:       maxnz = PetscMax(maxnz,procsnz[i]);
2015:     }
2016:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

2018:     /* read in my part of the matrix column indices  */
2019:     nz = procsnz[0];
2020:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
2021:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

2023:     /* read in every one elses and ship off */
2024:     for (i=1; i<size; i++) {
2025:       nz   = procsnz[i];
2026:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
2027:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2028:     }
2029:     PetscFree(cols);
2030:   } else {
2031:     /* determine buffer space needed for message */
2032:     nz = 0;
2033:     for (i=0; i<m; i++) {
2034:       nz += ourlens[i];
2035:     }
2036:     PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);

2038:     /* receive message of column indices*/
2039:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2040:     MPI_Get_count(&status,MPIU_INT,&maxnz);
2041:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042:   }

2044:   /* loop over local rows, determining number of off diagonal entries */
2045:   PetscMemzero(offlens,m*sizeof(PetscInt));
2046:   jj = 0;
2047:   for (i=0; i<m; i++) {
2048:     for (j=0; j<ourlens[i]; j++) {
2049:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2050:       jj++;
2051:     }
2052:   }

2054:   /* create our matrix */
2055:   for (i=0; i<m; i++) {
2056:     ourlens[i] -= offlens[i];
2057:   }
2058:   MatCreate(comm,newmat);
2059:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
2060:   MatSetType(*newmat,type);
2061:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
2062:   A = *newmat;
2063:   for (i=0; i<m; i++) {
2064:     ourlens[i] += offlens[i];
2065:   }

2067:   if (!rank) {
2068:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

2070:     /* read in my part of the matrix numerical values  */
2071:     nz = procsnz[0];
2072:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2073: 
2074:     /* insert into matrix */
2075:     jj      = rstart;
2076:     smycols = mycols;
2077:     svals   = vals;
2078:     for (i=0; i<m; i++) {
2079:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2080:       smycols += ourlens[i];
2081:       svals   += ourlens[i];
2082:       jj++;
2083:     }

2085:     /* read in other processors and ship out */
2086:     for (i=1; i<size; i++) {
2087:       nz   = procsnz[i];
2088:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2089:       MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2090:     }
2091:     PetscFree(procsnz);
2092:   } else {
2093:     /* receive numeric values */
2094:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

2096:     /* receive message of values*/
2097:     MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2098:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2099:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

2101:     /* insert into matrix */
2102:     jj      = rstart;
2103:     smycols = mycols;
2104:     svals   = vals;
2105:     for (i=0; i<m; i++) {
2106:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2107:       smycols += ourlens[i];
2108:       svals   += ourlens[i];
2109:       jj++;
2110:     }
2111:   }
2112:   PetscFree2(ourlens,offlens);
2113:   PetscFree(vals);
2114:   PetscFree(mycols);
2115:   PetscFree(rowners);

2117:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2118:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2119:   return(0);
2120: }

2124: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2125: {
2126:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2127:   Mat            a,b;
2128:   PetscTruth     flg;

2132:   a = matA->A;
2133:   b = matB->A;
2134:   MatEqual(a,b,&flg);
2135:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2136:   return(0);
2137: }

2139: #if defined(PETSC_HAVE_PLAPACK)

2143: /*@C
2144:   PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2145:   Level: developer

2147: .keywords: Petsc, destroy, package, PLAPACK
2148: .seealso: PetscFinalize()
2149: @*/
2150: PetscErrorCode  PetscPLAPACKFinalizePackage(void)
2151: {

2155:   PLA_Finalize();
2156:   return(0);
2157: }

2161: /*@C
2162:   PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2163:   called from MatCreate_MPIDense() the first time an MPI dense matrix is called.

2165:   Input Parameter:
2166: .   comm - the communicator the matrix lives on

2168:   Level: developer

2170:    Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2171:   same communicator (because there is some global state that is initialized and used for all matrices). In addition if 
2172:   PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2173:   cannot overlap.

2175: .keywords: Petsc, initialize, package, PLAPACK
2176: .seealso: PetscInitializePackage(), PetscInitialize()
2177: @*/
2178: PetscErrorCode  PetscPLAPACKInitializePackage(MPI_Comm comm)
2179: {
2180:   PetscMPIInt    size;

2184:   if (!PLA_Initialized(PETSC_NULL)) {

2186:     MPI_Comm_size(comm,&size);
2187:     Plapack_nprows = 1;
2188:     Plapack_npcols = size;
2189: 
2190:     PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2191:       PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2192:       PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2193: #if defined(PETSC_USE_DEBUG)
2194:       Plapack_ierror = 3;
2195: #else
2196:       Plapack_ierror = 0;
2197: #endif
2198:       PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2199:       if (Plapack_ierror){
2200:         PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2201:       } else {
2202:         PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2203:       }
2204: 
2205:       Plapack_nb_alg = 0;
2206:       PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2207:       if (Plapack_nb_alg) {
2208:         pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2209:       }
2210:     PetscOptionsEnd();

2212:     PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2213:     PLA_Init(Plapack_comm_2d);
2214:     PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2215:   }
2216:   return(0);
2217: }

2219: #endif