Actual source code: mpidense.c
1: #define PETSCMAT_DLL
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
7:
8: #include ../src/mat/impls/dense/mpi/mpidense.h
9: #if defined(PETSC_HAVE_PLAPACK)
10: static PetscMPIInt Plapack_nprows,Plapack_npcols,Plapack_ierror,Plapack_nb_alg;
11: static MPI_Comm Plapack_comm_2d;
13: #include "PLA.h"
16: typedef struct {
17: PLA_Obj A,pivots;
18: PLA_Template templ;
19: MPI_Datatype datatype;
20: PetscInt nb,rstart;
21: VecScatter ctx;
22: IS is_pla,is_petsc;
23: PetscTruth pla_solved;
24: MatStructure mstruct;
25: } Mat_Plapack;
26: #endif
30: /*@
32: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
33: matrix that represents the operator. For sequential matrices it returns itself.
35: Input Parameter:
36: . A - the Seq or MPI dense matrix
38: Output Parameter:
39: . B - the inner matrix
41: Level: intermediate
43: @*/
44: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
45: {
46: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
48: PetscTruth flg;
51: PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
52: if (flg) {
53: *B = mat->A;
54: } else {
55: *B = A;
56: }
57: return(0);
58: }
62: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
63: {
64: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
66: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
69: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
70: lrow = row - rstart;
71: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
72: return(0);
73: }
77: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
78: {
82: if (idx) {PetscFree(*idx);}
83: if (v) {PetscFree(*v);}
84: return(0);
85: }
90: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
91: {
92: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
94: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
95: PetscScalar *array;
96: MPI_Comm comm;
99: if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
101: /* The reuse aspect is not implemented efficiently */
102: if (reuse) { MatDestroy(*B);}
104: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
105: MatGetArray(mdn->A,&array);
106: MatCreate(comm,B);
107: MatSetSizes(*B,m,m,m,m);
108: MatSetType(*B,((PetscObject)mdn->A)->type_name);
109: MatSeqDenseSetPreallocation(*B,array+m*rstart);
110: MatRestoreArray(mdn->A,&array);
111: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
112: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
113:
114: *iscopy = PETSC_TRUE;
115: return(0);
116: }
121: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
122: {
123: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
125: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
126: PetscTruth roworiented = A->roworiented;
130: for (i=0; i<m; i++) {
131: if (idxm[i] < 0) continue;
132: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
133: if (idxm[i] >= rstart && idxm[i] < rend) {
134: row = idxm[i] - rstart;
135: if (roworiented) {
136: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
137: } else {
138: for (j=0; j<n; j++) {
139: if (idxn[j] < 0) continue;
140: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
141: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
142: }
143: }
144: } else {
145: if (!A->donotstash) {
146: if (roworiented) {
147: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
148: } else {
149: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
150: }
151: }
152: }
153: }
154: return(0);
155: }
159: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
160: {
161: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
163: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
166: for (i=0; i<m; i++) {
167: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
168: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
169: if (idxm[i] >= rstart && idxm[i] < rend) {
170: row = idxm[i] - rstart;
171: for (j=0; j<n; j++) {
172: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
173: if (idxn[j] >= mat->cmap->N) {
174: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
175: }
176: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
177: }
178: } else {
179: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
180: }
181: }
182: return(0);
183: }
187: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
188: {
189: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
193: MatGetArray(a->A,array);
194: return(0);
195: }
199: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
200: {
201: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
202: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
204: PetscInt i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
205: const PetscInt *irow,*icol;
206: PetscScalar *av,*bv,*v = lmat->v;
207: Mat newmat;
208: IS iscol_local;
211: ISAllGather(iscol,&iscol_local);
212: ISGetIndices(isrow,&irow);
213: ISGetIndices(iscol_local,&icol);
214: ISGetLocalSize(isrow,&nrows);
215: ISGetLocalSize(iscol,&ncols);
216: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
218: /* No parallel redistribution currently supported! Should really check each index set
219: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
220: original matrix! */
222: MatGetLocalSize(A,&nlrows,&nlcols);
223: MatGetOwnershipRange(A,&rstart,&rend);
224:
225: /* Check submatrix call */
226: if (scall == MAT_REUSE_MATRIX) {
227: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
228: /* Really need to test rows and column sizes! */
229: newmat = *B;
230: } else {
231: /* Create and fill new matrix */
232: MatCreate(((PetscObject)A)->comm,&newmat);
233: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
234: MatSetType(newmat,((PetscObject)A)->type_name);
235: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
236: }
238: /* Now extract the data pointers and do the copy, column at a time */
239: newmatd = (Mat_MPIDense*)newmat->data;
240: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
241:
242: for (i=0; i<Ncols; i++) {
243: av = v + ((Mat_SeqDense *)mat->A->data)->lda*icol[i];
244: for (j=0; j<nrows; j++) {
245: *bv++ = av[irow[j] - rstart];
246: }
247: }
249: /* Assemble the matrices so that the correct flags are set */
250: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
251: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
253: /* Free work space */
254: ISRestoreIndices(isrow,&irow);
255: ISRestoreIndices(iscol,&icol);
256: *B = newmat;
257: return(0);
258: }
262: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
263: {
265: return(0);
266: }
270: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
271: {
272: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
273: MPI_Comm comm = ((PetscObject)mat)->comm;
275: PetscInt nstash,reallocs;
276: InsertMode addv;
279: /* make sure all processors are either in INSERTMODE or ADDMODE */
280: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
281: if (addv == (ADD_VALUES|INSERT_VALUES)) {
282: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
283: }
284: mat->insertmode = addv; /* in case this processor had no cache */
286: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
287: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
288: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
289: return(0);
290: }
294: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
295: {
296: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
297: PetscErrorCode ierr;
298: PetscInt i,*row,*col,flg,j,rstart,ncols;
299: PetscMPIInt n;
300: PetscScalar *val;
301: InsertMode addv=mat->insertmode;
304: /* wait on receives */
305: while (1) {
306: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
307: if (!flg) break;
308:
309: for (i=0; i<n;) {
310: /* Now identify the consecutive vals belonging to the same row */
311: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
312: if (j < n) ncols = j-i;
313: else ncols = n-i;
314: /* Now assemble all these values with a single function call */
315: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
316: i = j;
317: }
318: }
319: MatStashScatterEnd_Private(&mat->stash);
320:
321: MatAssemblyBegin(mdn->A,mode);
322: MatAssemblyEnd(mdn->A,mode);
324: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
325: MatSetUpMultiply_MPIDense(mat);
326: }
327: return(0);
328: }
332: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
333: {
335: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
338: MatZeroEntries(l->A);
339: return(0);
340: }
342: /* the code does not do the diagonal entries correctly unless the
343: matrix is square and the column and row owerships are identical.
344: This is a BUG. The only way to fix it seems to be to access
345: mdn->A and mdn->B directly and not through the MatZeroRows()
346: routine.
347: */
350: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
351: {
352: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
354: PetscInt i,*owners = A->rmap->range;
355: PetscInt *nprocs,j,idx,nsends;
356: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
357: PetscInt *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source;
358: PetscInt *lens,*lrows,*values;
359: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
360: MPI_Comm comm = ((PetscObject)A)->comm;
361: MPI_Request *send_waits,*recv_waits;
362: MPI_Status recv_status,*send_status;
363: PetscTruth found;
366: /* first count number of contributors to each processor */
367: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
368: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
369: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
370: for (i=0; i<N; i++) {
371: idx = rows[i];
372: found = PETSC_FALSE;
373: for (j=0; j<size; j++) {
374: if (idx >= owners[j] && idx < owners[j+1]) {
375: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
376: }
377: }
378: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
379: }
380: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
382: /* inform other processors of number of messages and max length*/
383: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
385: /* post receives: */
386: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
387: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
388: for (i=0; i<nrecvs; i++) {
389: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
390: }
392: /* do sends:
393: 1) starts[i] gives the starting index in svalues for stuff going to
394: the ith processor
395: */
396: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
397: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
398: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
399: starts[0] = 0;
400: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
401: for (i=0; i<N; i++) {
402: svalues[starts[owner[i]]++] = rows[i];
403: }
405: starts[0] = 0;
406: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407: count = 0;
408: for (i=0; i<size; i++) {
409: if (nprocs[2*i+1]) {
410: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
411: }
412: }
413: PetscFree(starts);
415: base = owners[rank];
417: /* wait on receives */
418: PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);
419: count = nrecvs;
420: slen = 0;
421: while (count) {
422: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
423: /* unpack receives into our local space */
424: MPI_Get_count(&recv_status,MPIU_INT,&n);
425: source[imdex] = recv_status.MPI_SOURCE;
426: lens[imdex] = n;
427: slen += n;
428: count--;
429: }
430: PetscFree(recv_waits);
431:
432: /* move the data into the send scatter */
433: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
434: count = 0;
435: for (i=0; i<nrecvs; i++) {
436: values = rvalues + i*nmax;
437: for (j=0; j<lens[i]; j++) {
438: lrows[count++] = values[j] - base;
439: }
440: }
441: PetscFree(rvalues);
442: PetscFree2(lens,source);
443: PetscFree(owner);
444: PetscFree(nprocs);
445:
446: /* actually zap the local rows */
447: MatZeroRows(l->A,slen,lrows,diag);
448: PetscFree(lrows);
450: /* wait on sends */
451: if (nsends) {
452: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
453: MPI_Waitall(nsends,send_waits,send_status);
454: PetscFree(send_status);
455: }
456: PetscFree(send_waits);
457: PetscFree(svalues);
459: return(0);
460: }
464: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
465: {
466: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
470: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
471: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
472: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
473: return(0);
474: }
478: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
479: {
480: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
484: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
485: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
486: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
487: return(0);
488: }
492: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
493: {
494: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
496: PetscScalar zero = 0.0;
499: VecSet(yy,zero);
500: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
501: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
502: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
503: return(0);
504: }
508: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
509: {
510: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
514: VecCopy(yy,zz);
515: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
516: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
517: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
518: return(0);
519: }
523: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
524: {
525: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
526: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
528: PetscInt len,i,n,m = A->rmap->n,radd;
529: PetscScalar *x,zero = 0.0;
530:
532: VecSet(v,zero);
533: VecGetArray(v,&x);
534: VecGetSize(v,&n);
535: if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
536: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
537: radd = A->rmap->rstart*m;
538: for (i=0; i<len; i++) {
539: x[i] = aloc->v[radd + i*m + i];
540: }
541: VecRestoreArray(v,&x);
542: return(0);
543: }
547: PetscErrorCode MatDestroy_MPIDense(Mat mat)
548: {
549: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
551: #if defined(PETSC_HAVE_PLAPACK)
552: Mat_Plapack *lu=(Mat_Plapack*)(mat->spptr);
553: #endif
557: #if defined(PETSC_USE_LOG)
558: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
559: #endif
560: MatStashDestroy_Private(&mat->stash);
561: MatDestroy(mdn->A);
562: if (mdn->lvec) {VecDestroy(mdn->lvec);}
563: if (mdn->Mvctx) {VecScatterDestroy(mdn->Mvctx);}
564: #if defined(PETSC_HAVE_PLAPACK)
565: if (lu) {
566: PLA_Obj_free(&lu->A);
567: PLA_Obj_free (&lu->pivots);
568: PLA_Temp_free(&lu->templ);
570: if (lu->is_pla) {
571: ISDestroy(lu->is_pla);
572: ISDestroy(lu->is_petsc);
573: VecScatterDestroy(lu->ctx);
574: }
575: }
576: #endif
578: PetscFree(mdn);
579: PetscObjectChangeTypeName((PetscObject)mat,0);
580: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
581: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
582: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
583: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
584: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
585: return(0);
586: }
590: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
591: {
592: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
593: PetscErrorCode ierr;
594: PetscViewerFormat format;
595: int fd;
596: PetscInt header[4],mmax,N = mat->cmap->N,i,j,m,k;
597: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
598: PetscScalar *work,*v,*vv;
599: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
600: MPI_Status status;
603: if (mdn->size == 1) {
604: MatView(mdn->A,viewer);
605: } else {
606: PetscViewerBinaryGetDescriptor(viewer,&fd);
607: MPI_Comm_rank(((PetscObject)mat)->comm,&rank);
608: MPI_Comm_size(((PetscObject)mat)->comm,&size);
610: PetscViewerGetFormat(viewer,&format);
611: if (format == PETSC_VIEWER_NATIVE) {
613: if (!rank) {
614: /* store the matrix as a dense matrix */
615: header[0] = MAT_FILE_COOKIE;
616: header[1] = mat->rmap->N;
617: header[2] = N;
618: header[3] = MATRIX_BINARY_FORMAT_DENSE;
619: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
621: /* get largest work array needed for transposing array */
622: mmax = mat->rmap->n;
623: for (i=1; i<size; i++) {
624: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
625: }
626: PetscMalloc(mmax*N*sizeof(PetscScalar),&work);
628: /* write out local array, by rows */
629: m = mat->rmap->n;
630: v = a->v;
631: for (j=0; j<N; j++) {
632: for (i=0; i<m; i++) {
633: work[j + i*N] = *v++;
634: }
635: }
636: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
637: /* get largest work array to receive messages from other processes, excludes process zero */
638: mmax = 0;
639: for (i=1; i<size; i++) {
640: mmax = PetscMax(mmax,mat->rmap->range[i+1] - mat->rmap->range[i]);
641: }
642: PetscMalloc(mmax*N*sizeof(PetscScalar),&vv);
643: for(k = 1; k < size; k++) {
644: v = vv;
645: m = mat->rmap->range[k+1] - mat->rmap->range[k];
646: MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,((PetscObject)mat)->comm,&status);
648: for(j = 0; j < N; j++) {
649: for(i = 0; i < m; i++) {
650: work[j + i*N] = *v++;
651: }
652: }
653: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
654: }
655: PetscFree(work);
656: PetscFree(vv);
657: } else {
658: MPI_Send(a->v,mat->rmap->n*mat->cmap->N,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);
659: }
660: } else {
661: SETERRQ(PETSC_ERR_SUP,"To store a parallel dense matrix you must first call PetscViewerSetFormat(viewer,PETSC_VIEWER_NATIVE");
662: }
663: }
664: return(0);
665: }
669: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
670: {
671: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
672: PetscErrorCode ierr;
673: PetscMPIInt size = mdn->size,rank = mdn->rank;
674: const PetscViewerType vtype;
675: PetscTruth iascii,isdraw;
676: PetscViewer sviewer;
677: PetscViewerFormat format;
678: #if defined(PETSC_HAVE_PLAPACK)
679: Mat_Plapack *lu;
680: #endif
683: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
684: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
685: if (iascii) {
686: PetscViewerGetType(viewer,&vtype);
687: PetscViewerGetFormat(viewer,&format);
688: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
689: MatInfo info;
690: MatGetInfo(mat,MAT_LOCAL,&info);
691: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,
692: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
693: PetscViewerFlush(viewer);
694: #if defined(PETSC_HAVE_PLAPACK)
695: PetscViewerASCIIPrintf(viewer,"PLAPACK run parameters:\n");
696: PetscViewerASCIIPrintf(viewer," Processor mesh: nprows %d, npcols %d\n",Plapack_nprows, Plapack_npcols);
697: PetscViewerASCIIPrintf(viewer," Error checking: %d\n",Plapack_ierror);
698: PetscViewerASCIIPrintf(viewer," Algorithmic block size: %d\n",Plapack_nb_alg);
699: if (mat->factor){
700: lu=(Mat_Plapack*)(mat->spptr);
701: PetscViewerASCIIPrintf(viewer," Distr. block size nb: %d \n",lu->nb);
702: }
703: #else
704: VecScatterView(mdn->Mvctx,viewer);
705: #endif
706: return(0);
707: } else if (format == PETSC_VIEWER_ASCII_INFO) {
708: return(0);
709: }
710: } else if (isdraw) {
711: PetscDraw draw;
712: PetscTruth isnull;
714: PetscViewerDrawGetDraw(viewer,0,&draw);
715: PetscDrawIsNull(draw,&isnull);
716: if (isnull) return(0);
717: }
719: if (size == 1) {
720: MatView(mdn->A,viewer);
721: } else {
722: /* assemble the entire matrix onto first processor. */
723: Mat A;
724: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
725: PetscInt *cols;
726: PetscScalar *vals;
728: MatCreate(((PetscObject)mat)->comm,&A);
729: if (!rank) {
730: MatSetSizes(A,M,N,M,N);
731: } else {
732: MatSetSizes(A,0,0,M,N);
733: }
734: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
735: MatSetType(A,MATMPIDENSE);
736: MatMPIDenseSetPreallocation(A,PETSC_NULL);
737: PetscLogObjectParent(mat,A);
739: /* Copy the matrix ... This isn't the most efficient means,
740: but it's quick for now */
741: A->insertmode = INSERT_VALUES;
742: row = mat->rmap->rstart; m = mdn->A->rmap->n;
743: for (i=0; i<m; i++) {
744: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
745: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
746: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
747: row++;
748: }
750: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
751: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
752: PetscViewerGetSingleton(viewer,&sviewer);
753: if (!rank) {
754: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
755: }
756: PetscViewerRestoreSingleton(viewer,&sviewer);
757: PetscViewerFlush(viewer);
758: MatDestroy(A);
759: }
760: return(0);
761: }
765: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
766: {
768: PetscTruth iascii,isbinary,isdraw,issocket;
769:
771:
772: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
773: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
774: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
775: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
777: if (iascii || issocket || isdraw) {
778: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
779: } else if (isbinary) {
780: MatView_MPIDense_Binary(mat,viewer);
781: } else {
782: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
783: }
784: return(0);
785: }
789: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
790: {
791: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
792: Mat mdn = mat->A;
794: PetscReal isend[5],irecv[5];
797: info->block_size = 1.0;
798: MatGetInfo(mdn,MAT_LOCAL,info);
799: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
800: isend[3] = info->memory; isend[4] = info->mallocs;
801: if (flag == MAT_LOCAL) {
802: info->nz_used = isend[0];
803: info->nz_allocated = isend[1];
804: info->nz_unneeded = isend[2];
805: info->memory = isend[3];
806: info->mallocs = isend[4];
807: } else if (flag == MAT_GLOBAL_MAX) {
808: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
809: info->nz_used = irecv[0];
810: info->nz_allocated = irecv[1];
811: info->nz_unneeded = irecv[2];
812: info->memory = irecv[3];
813: info->mallocs = irecv[4];
814: } else if (flag == MAT_GLOBAL_SUM) {
815: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
816: info->nz_used = irecv[0];
817: info->nz_allocated = irecv[1];
818: info->nz_unneeded = irecv[2];
819: info->memory = irecv[3];
820: info->mallocs = irecv[4];
821: }
822: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
823: info->fill_ratio_needed = 0;
824: info->factor_mallocs = 0;
825: return(0);
826: }
830: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscTruth flg)
831: {
832: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
836: switch (op) {
837: case MAT_NEW_NONZERO_LOCATIONS:
838: case MAT_NEW_NONZERO_LOCATION_ERR:
839: case MAT_NEW_NONZERO_ALLOCATION_ERR:
840: MatSetOption(a->A,op,flg);
841: break;
842: case MAT_ROW_ORIENTED:
843: a->roworiented = flg;
844: MatSetOption(a->A,op,flg);
845: break;
846: case MAT_NEW_DIAGONALS:
847: case MAT_USE_HASH_TABLE:
848: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
849: break;
850: case MAT_IGNORE_OFF_PROC_ENTRIES:
851: a->donotstash = flg;
852: break;
853: case MAT_SYMMETRIC:
854: case MAT_STRUCTURALLY_SYMMETRIC:
855: case MAT_HERMITIAN:
856: case MAT_SYMMETRY_ETERNAL:
857: case MAT_IGNORE_LOWER_TRIANGULAR:
858: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
859: break;
860: default:
861: SETERRQ1(PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
862: }
863: return(0);
864: }
869: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
870: {
871: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
872: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
873: PetscScalar *l,*r,x,*v;
875: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n;
878: MatGetLocalSize(A,&s2,&s3);
879: if (ll) {
880: VecGetLocalSize(ll,&s2a);
881: if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
882: VecGetArray(ll,&l);
883: for (i=0; i<m; i++) {
884: x = l[i];
885: v = mat->v + i;
886: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
887: }
888: VecRestoreArray(ll,&l);
889: PetscLogFlops(n*m);
890: }
891: if (rr) {
892: VecGetLocalSize(rr,&s3a);
893: if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
894: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
895: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
896: VecGetArray(mdn->lvec,&r);
897: for (i=0; i<n; i++) {
898: x = r[i];
899: v = mat->v + i*m;
900: for (j=0; j<m; j++) { (*v++) *= x;}
901: }
902: VecRestoreArray(mdn->lvec,&r);
903: PetscLogFlops(n*m);
904: }
905: return(0);
906: }
910: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
911: {
912: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
913: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
915: PetscInt i,j;
916: PetscReal sum = 0.0;
917: PetscScalar *v = mat->v;
920: if (mdn->size == 1) {
921: MatNorm(mdn->A,type,nrm);
922: } else {
923: if (type == NORM_FROBENIUS) {
924: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
925: #if defined(PETSC_USE_COMPLEX)
926: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
927: #else
928: sum += (*v)*(*v); v++;
929: #endif
930: }
931: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
932: *nrm = sqrt(*nrm);
933: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
934: } else if (type == NORM_1) {
935: PetscReal *tmp,*tmp2;
936: PetscMalloc2(A->cmap->N,PetscReal,&tmp,A->cmap->N,PetscReal,&tmp2);
937: PetscMemzero(tmp,A->cmap->N*sizeof(PetscReal));
938: PetscMemzero(tmp2,A->cmap->N*sizeof(PetscReal));
939: *nrm = 0.0;
940: v = mat->v;
941: for (j=0; j<mdn->A->cmap->n; j++) {
942: for (i=0; i<mdn->A->rmap->n; i++) {
943: tmp[j] += PetscAbsScalar(*v); v++;
944: }
945: }
946: MPI_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)A)->comm);
947: for (j=0; j<A->cmap->N; j++) {
948: if (tmp2[j] > *nrm) *nrm = tmp2[j];
949: }
950: PetscFree2(tmp,tmp);
951: PetscLogFlops(A->cmap->n*A->rmap->n);
952: } else if (type == NORM_INFINITY) { /* max row norm */
953: PetscReal ntemp;
954: MatNorm(mdn->A,type,&ntemp);
955: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)A)->comm);
956: } else {
957: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
958: }
959: }
960: return(0);
961: }
965: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
966: {
967: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
968: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
969: Mat B;
970: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
972: PetscInt j,i;
973: PetscScalar *v;
976: if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
977: if (reuse == MAT_INITIAL_MATRIX || A == *matout) {
978: MatCreate(((PetscObject)A)->comm,&B);
979: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
980: MatSetType(B,((PetscObject)A)->type_name);
981: MatMPIDenseSetPreallocation(B,PETSC_NULL);
982: } else {
983: B = *matout;
984: }
986: m = a->A->rmap->n; n = a->A->cmap->n; v = Aloc->v;
987: PetscMalloc(m*sizeof(PetscInt),&rwork);
988: for (i=0; i<m; i++) rwork[i] = rstart + i;
989: for (j=0; j<n; j++) {
990: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
991: v += m;
992: }
993: PetscFree(rwork);
994: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
995: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
996: if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
997: *matout = B;
998: } else {
999: MatHeaderCopy(A,B);
1000: }
1001: return(0);
1002: }
1005: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1010: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1011: {
1015: MatMPIDenseSetPreallocation(A,0);
1016: return(0);
1017: }
1019: #if defined(PETSC_HAVE_PLAPACK)
1023: PetscErrorCode MatMPIDenseCopyToPlapack(Mat A,Mat F)
1024: {
1025: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1027: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1028: PetscScalar *array;
1029: PetscReal one = 1.0;
1032: /* Copy A into F->lu->A */
1033: PLA_Obj_set_to_zero(lu->A);
1034: PLA_API_begin();
1035: PLA_Obj_API_open(lu->A);
1036: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1037: MatGetArray(A,&array);
1038: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1039: MatRestoreArray(A,&array);
1040: PLA_Obj_API_close(lu->A);
1041: PLA_API_end();
1042: lu->rstart = rstart;
1043: return(0);
1044: }
1048: PetscErrorCode MatMPIDenseCopyFromPlapack(Mat F,Mat A)
1049: {
1050: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1052: PetscInt M=A->cmap->N,m=A->rmap->n,rstart;
1053: PetscScalar *array;
1054: PetscReal one = 1.0;
1057: /* Copy F into A->lu->A */
1058: MatZeroEntries(A);
1059: PLA_API_begin();
1060: PLA_Obj_API_open(lu->A);
1061: MatGetOwnershipRange(A,&rstart,PETSC_NULL);
1062: MatGetArray(A,&array);
1063: PLA_API_axpy_global_to_matrix(m,M, &one,lu->A,rstart,0,(void *)array,m);
1064: MatRestoreArray(A,&array);
1065: PLA_Obj_API_close(lu->A);
1066: PLA_API_end();
1067: lu->rstart = rstart;
1068: return(0);
1069: }
1073: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
1074: {
1076: Mat_Plapack *luA = (Mat_Plapack*)A->spptr;
1077: Mat_Plapack *luB = (Mat_Plapack*)B->spptr;
1078: Mat_Plapack *luC = (Mat_Plapack*)C->spptr;
1079: PLA_Obj alpha = NULL,beta = NULL;
1082: MatMPIDenseCopyToPlapack(A,A);
1083: MatMPIDenseCopyToPlapack(B,B);
1085: /*
1086: PLA_Global_show("A = ",luA->A,"%g ","");
1087: PLA_Global_show("B = ",luB->A,"%g ","");
1088: */
1090: /* do the multiply in PLA */
1091: PLA_Create_constants_conf_to(luA->A,NULL,NULL,&alpha);
1092: PLA_Create_constants_conf_to(luC->A,NULL,&beta,NULL);
1093: CHKMEMQ;
1095: PLA_Gemm(PLA_NO_TRANSPOSE,PLA_NO_TRANSPOSE,alpha,luA->A,luB->A,beta,luC->A); /* */
1096: CHKMEMQ;
1097: PLA_Obj_free(&alpha);
1098: PLA_Obj_free(&beta);
1100: /*
1101: PLA_Global_show("C = ",luC->A,"%g ","");
1102: */
1103: MatMPIDenseCopyFromPlapack(C,C);
1104: return(0);
1105: }
1109: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1110: {
1112: PetscInt m=A->rmap->n,n=B->cmap->n;
1113: Mat Cmat;
1116: if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
1117: SETERRQ(PETSC_ERR_LIB,"Due to apparent bugs in PLAPACK,this is not currently supported");
1118: MatCreate(((PetscObject)B)->comm,&Cmat);
1119: MatSetSizes(Cmat,m,n,A->rmap->N,B->cmap->N);
1120: MatSetType(Cmat,MATMPIDENSE);
1121: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1122: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1124: *C = Cmat;
1125: return(0);
1126: }
1130: PetscErrorCode MatMatMult_MPIDense_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
1131: {
1135: if (scall == MAT_INITIAL_MATRIX){
1136: MatMatMultSymbolic_MPIDense_MPIDense(A,B,fill,C);
1137: }
1138: MatMatMultNumeric_MPIDense_MPIDense(A,B,*C);
1139: return(0);
1140: }
1144: PetscErrorCode MatSolve_MPIDense(Mat A,Vec b,Vec x)
1145: {
1146: MPI_Comm comm = ((PetscObject)A)->comm;
1147: Mat_Plapack *lu = (Mat_Plapack*)A->spptr;
1149: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,i,j,*idx_pla,*idx_petsc,loc_m,loc_stride;
1150: PetscScalar *array;
1151: PetscReal one = 1.0;
1152: PetscMPIInt size,rank,r_rank,r_nproc,c_rank,c_nproc;;
1153: PLA_Obj v_pla = NULL;
1154: PetscScalar *loc_buf;
1155: Vec loc_x;
1156:
1158: MPI_Comm_size(comm,&size);
1159: MPI_Comm_rank(comm,&rank);
1161: /* Create PLAPACK vector objects, then copy b into PLAPACK b */
1162: PLA_Mvector_create(lu->datatype,M,1,lu->templ,PLA_ALIGN_FIRST,&v_pla);
1163: PLA_Obj_set_to_zero(v_pla);
1165: /* Copy b into rhs_pla */
1166: PLA_API_begin();
1167: PLA_Obj_API_open(v_pla);
1168: VecGetArray(b,&array);
1169: PLA_API_axpy_vector_to_global(m,&one,(void *)array,1,v_pla,lu->rstart);
1170: VecRestoreArray(b,&array);
1171: PLA_Obj_API_close(v_pla);
1172: PLA_API_end();
1174: if (A->factor == MAT_FACTOR_LU){
1175: /* Apply the permutations to the right hand sides */
1176: PLA_Apply_pivots_to_rows (v_pla,lu->pivots);
1178: /* Solve L y = b, overwriting b with y */
1179: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_UNIT_DIAG,lu->A,v_pla );
1181: /* Solve U x = y (=b), overwriting b with x */
1182: PLA_Trsv( PLA_UPPER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla );
1183: } else { /* MAT_FACTOR_CHOLESKY */
1184: PLA_Trsv( PLA_LOWER_TRIANGULAR,PLA_NO_TRANSPOSE,PLA_NONUNIT_DIAG,lu->A,v_pla);
1185: PLA_Trsv( PLA_LOWER_TRIANGULAR,(lu->datatype == MPI_DOUBLE ? PLA_TRANSPOSE : PLA_CONJUGATE_TRANSPOSE),
1186: PLA_NONUNIT_DIAG,lu->A,v_pla);
1187: }
1189: /* Copy PLAPACK x into Petsc vector x */
1190: PLA_Obj_local_length(v_pla, &loc_m);
1191: PLA_Obj_local_buffer(v_pla, (void**)&loc_buf);
1192: PLA_Obj_local_stride(v_pla, &loc_stride);
1193: /*
1194: PetscPrintf(PETSC_COMM_SELF," [%d] b - local_m %d local_stride %d, loc_buf: %g %g, nb: %d\n",rank,loc_m,loc_stride,loc_buf[0],loc_buf[(loc_m-1)*loc_stride],lu->nb);
1195: */
1196: VecCreateSeqWithArray(PETSC_COMM_SELF,loc_m*loc_stride,loc_buf,&loc_x);
1197: if (!lu->pla_solved){
1198:
1199: PLA_Temp_comm_row_info(lu->templ,&Plapack_comm_2d,&r_rank,&r_nproc);
1200: PLA_Temp_comm_col_info(lu->templ,&Plapack_comm_2d,&c_rank,&c_nproc);
1202: /* Create IS and cts for VecScatterring */
1203: PLA_Obj_local_length(v_pla, &loc_m);
1204: PLA_Obj_local_stride(v_pla, &loc_stride);
1205: PetscMalloc2(loc_m,PetscInt,&idx_pla,loc_m,PetscInt,&idx_petsc);
1207: rstart = (r_rank*c_nproc+c_rank)*lu->nb;
1208: for (i=0; i<loc_m; i+=lu->nb){
1209: j = 0;
1210: while (j < lu->nb && i+j < loc_m){
1211: idx_petsc[i+j] = rstart + j; j++;
1212: }
1213: rstart += size*lu->nb;
1214: }
1216: for (i=0; i<loc_m; i++) idx_pla[i] = i*loc_stride;
1218: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_pla,&lu->is_pla);
1219: ISCreateGeneral(PETSC_COMM_SELF,loc_m,idx_petsc,&lu->is_petsc);
1220: PetscFree2(idx_pla,idx_petsc);
1221: VecScatterCreate(loc_x,lu->is_pla,x,lu->is_petsc,&lu->ctx);
1222: }
1223: VecScatterBegin(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1224: VecScatterEnd(lu->ctx,loc_x,x,INSERT_VALUES,SCATTER_FORWARD);
1225:
1226: /* Free data */
1227: VecDestroy(loc_x);
1228: PLA_Obj_free(&v_pla);
1230: lu->pla_solved = PETSC_TRUE;
1231: return(0);
1232: }
1236: PetscErrorCode MatLUFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1237: {
1238: Mat_Plapack *lu = (Mat_Plapack*)(F)->spptr;
1240: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1241: PetscInt info_pla=0;
1242: PetscScalar *array,one = 1.0;
1245: if (lu->mstruct == SAME_NONZERO_PATTERN){
1246: PLA_Obj_free(&lu->A);
1247: PLA_Obj_free (&lu->pivots);
1248: }
1249: /* Create PLAPACK matrix object */
1250: lu->A = NULL; lu->pivots = NULL;
1251: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1252: PLA_Obj_set_to_zero(lu->A);
1253: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1255: /* Copy A into lu->A */
1256: PLA_API_begin();
1257: PLA_Obj_API_open(lu->A);
1258: MatGetOwnershipRange(A,&rstart,&rend);
1259: MatGetArray(A,&array);
1260: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1261: MatRestoreArray(A,&array);
1262: PLA_Obj_API_close(lu->A);
1263: PLA_API_end();
1265: /* Factor P A -> L U overwriting lower triangular portion of A with L, upper, U */
1266: info_pla = PLA_LU(lu->A,lu->pivots);
1267: if (info_pla != 0)
1268: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot encountered at row %d from PLA_LU()",info_pla);
1270: lu->rstart = rstart;
1271: lu->mstruct = SAME_NONZERO_PATTERN;
1272: F->ops->solve = MatSolve_MPIDense;
1273: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1274: return(0);
1275: }
1279: PetscErrorCode MatCholeskyFactorNumeric_MPIDense(Mat F,Mat A,const MatFactorInfo *info)
1280: {
1281: Mat_Plapack *lu = (Mat_Plapack*)F->spptr;
1283: PetscInt M=A->rmap->N,m=A->rmap->n,rstart,rend;
1284: PetscInt info_pla=0;
1285: PetscScalar *array,one = 1.0;
1288: if (lu->mstruct == SAME_NONZERO_PATTERN){
1289: PLA_Obj_free(&lu->A);
1290: }
1291: /* Create PLAPACK matrix object */
1292: lu->A = NULL;
1293: lu->pivots = NULL;
1294: PLA_Matrix_create(lu->datatype,M,M,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1296: /* Copy A into lu->A */
1297: PLA_API_begin();
1298: PLA_Obj_API_open(lu->A);
1299: MatGetOwnershipRange(A,&rstart,&rend);
1300: MatGetArray(A,&array);
1301: PLA_API_axpy_matrix_to_global(m,M, &one,(void *)array,m,lu->A,rstart,0);
1302: MatRestoreArray(A,&array);
1303: PLA_Obj_API_close(lu->A);
1304: PLA_API_end();
1306: /* Factor P A -> Chol */
1307: info_pla = PLA_Chol(PLA_LOWER_TRIANGULAR,lu->A);
1308: if (info_pla != 0)
1309: SETERRQ1( PETSC_ERR_MAT_CH_ZRPVT,"Nonpositive definite matrix detected at row %d from PLA_Chol()",info_pla);
1311: lu->rstart = rstart;
1312: lu->mstruct = SAME_NONZERO_PATTERN;
1313: F->ops->solve = MatSolve_MPIDense;
1314: F->assembled = PETSC_TRUE; /* required by -ksp_view */
1315: return(0);
1316: }
1318: /* Note the Petsc perm permutation is ignored */
1321: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat F,Mat A,IS perm,const MatFactorInfo *info)
1322: {
1324: PetscTruth issymmetric,set;
1327: MatIsSymmetricKnown(A,&set,&issymmetric);
1328: if (!set || !issymmetric) SETERRQ(PETSC_ERR_USER,"Matrix must be set as MAT_SYMMETRIC for CholeskyFactor()");
1329: F->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_MPIDense;
1330: return(0);
1331: }
1333: /* Note the Petsc r and c permutations are ignored */
1336: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1337: {
1339: PetscInt M = A->rmap->N;
1340: Mat_Plapack *lu;
1343: lu = (Mat_Plapack*)F->spptr;
1344: PLA_Mvector_create(MPI_INT,M,1,lu->templ,PLA_ALIGN_FIRST,&lu->pivots);
1345: F->ops->lufactornumeric = MatLUFactorNumeric_MPIDense;
1346: return(0);
1347: }
1352: PetscErrorCode MatFactorGetSolverPackage_mpidense_plapack(Mat A,const MatSolverPackage *type)
1353: {
1355: *type = MAT_SOLVER_PLAPACK;
1356: return(0);
1357: }
1363: PetscErrorCode MatGetFactor_mpidense_plapack(Mat A,MatFactorType ftype,Mat *F)
1364: {
1366: Mat_Plapack *lu;
1367: PetscMPIInt size;
1368: PetscInt M=A->rmap->N;
1371: /* Create the factorization matrix */
1372: MatCreate(((PetscObject)A)->comm,F);
1373: MatSetSizes(*F,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1374: MatSetType(*F,((PetscObject)A)->type_name);
1375: PetscNewLog(*F,Mat_Plapack,&lu);
1376: (*F)->spptr = (void*)lu;
1378: /* Set default Plapack parameters */
1379: MPI_Comm_size(((PetscObject)A)->comm,&size);
1380: lu->nb = M/size;
1381: if (M - lu->nb*size) lu->nb++; /* without cyclic distribution */
1382:
1383: /* Set runtime options */
1384: PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"PLAPACK Options","Mat");
1385: PetscOptionsInt("-mat_plapack_nb","block size of template vector","None",lu->nb,&lu->nb,PETSC_NULL);
1386: PetscOptionsEnd();
1388: /* Create object distribution template */
1389: lu->templ = NULL;
1390: PLA_Temp_create(lu->nb, 0, &lu->templ);
1392: /* Set the datatype */
1393: #if defined(PETSC_USE_COMPLEX)
1394: lu->datatype = MPI_DOUBLE_COMPLEX;
1395: #else
1396: lu->datatype = MPI_DOUBLE;
1397: #endif
1399: PLA_Matrix_create(lu->datatype,M,A->cmap->N,lu->templ,PLA_ALIGN_FIRST,PLA_ALIGN_FIRST,&lu->A);
1402: lu->pla_solved = PETSC_FALSE; /* MatSolve_Plapack() is called yet */
1403: lu->mstruct = DIFFERENT_NONZERO_PATTERN;
1405: if (ftype == MAT_FACTOR_LU) {
1406: (*F)->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIDense;
1407: } else if (ftype == MAT_FACTOR_CHOLESKY) {
1408: (*F)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MPIDense;
1409: } else SETERRQ(PETSC_ERR_SUP,"No incomplete factorizations for dense matrices");
1410: (*F)->factor = ftype;
1411: PetscObjectComposeFunctionDynamic((PetscObject)(*F),"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_mpidense_plapack",MatFactorGetSolverPackage_mpidense_plapack);
1412: return(0);
1413: }
1415: #endif
1419: PetscErrorCode MatGetFactor_mpidense_petsc(Mat A,MatFactorType ftype,Mat *F)
1420: {
1421: #if defined(PETSC_HAVE_PLAPACK)
1423: #endif
1426: #if defined(PETSC_HAVE_PLAPACK)
1427: MatGetFactor_mpidense_plapack(A,ftype,F);
1428: #else
1429: SETERRQ1(PETSC_ERR_SUP,"Matrix format %s uses PLAPACK direct solver. Install PLAPACK",((PetscObject)A)->type_name);
1430: #endif
1431: return(0);
1432: }
1436: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
1437: {
1439: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
1442: MatAXPY(A->A,alpha,B->A,str);
1443: return(0);
1444: }
1448: PetscErrorCode MatConjugate_MPIDense(Mat mat)
1449: {
1450: Mat_MPIDense *a = (Mat_MPIDense *)mat->data;
1454: MatConjugate(a->A);
1455: return(0);
1456: }
1460: PetscErrorCode MatRealPart_MPIDense(Mat A)
1461: {
1462: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1466: MatRealPart(a->A);
1467: return(0);
1468: }
1472: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
1473: {
1474: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1478: MatImaginaryPart(a->A);
1479: return(0);
1480: }
1482: /* -------------------------------------------------------------------*/
1483: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1484: MatGetRow_MPIDense,
1485: MatRestoreRow_MPIDense,
1486: MatMult_MPIDense,
1487: /* 4*/ MatMultAdd_MPIDense,
1488: MatMultTranspose_MPIDense,
1489: MatMultTransposeAdd_MPIDense,
1490: 0,
1491: 0,
1492: 0,
1493: /*10*/ 0,
1494: 0,
1495: 0,
1496: 0,
1497: MatTranspose_MPIDense,
1498: /*15*/ MatGetInfo_MPIDense,
1499: MatEqual_MPIDense,
1500: MatGetDiagonal_MPIDense,
1501: MatDiagonalScale_MPIDense,
1502: MatNorm_MPIDense,
1503: /*20*/ MatAssemblyBegin_MPIDense,
1504: MatAssemblyEnd_MPIDense,
1505: MatSetOption_MPIDense,
1506: MatZeroEntries_MPIDense,
1507: /*24*/ MatZeroRows_MPIDense,
1508: 0,
1509: 0,
1510: 0,
1511: 0,
1512: /*29*/ MatSetUpPreallocation_MPIDense,
1513: 0,
1514: 0,
1515: MatGetArray_MPIDense,
1516: MatRestoreArray_MPIDense,
1517: /*34*/ MatDuplicate_MPIDense,
1518: 0,
1519: 0,
1520: 0,
1521: 0,
1522: /*39*/ MatAXPY_MPIDense,
1523: MatGetSubMatrices_MPIDense,
1524: 0,
1525: MatGetValues_MPIDense,
1526: 0,
1527: /*44*/ 0,
1528: MatScale_MPIDense,
1529: 0,
1530: 0,
1531: 0,
1532: /*49*/ 0,
1533: 0,
1534: 0,
1535: 0,
1536: 0,
1537: /*54*/ 0,
1538: 0,
1539: 0,
1540: 0,
1541: 0,
1542: /*59*/ MatGetSubMatrix_MPIDense,
1543: MatDestroy_MPIDense,
1544: MatView_MPIDense,
1545: 0,
1546: 0,
1547: /*64*/ 0,
1548: 0,
1549: 0,
1550: 0,
1551: 0,
1552: /*69*/ 0,
1553: 0,
1554: 0,
1555: 0,
1556: 0,
1557: /*74*/ 0,
1558: 0,
1559: 0,
1560: 0,
1561: 0,
1562: /*79*/ 0,
1563: 0,
1564: 0,
1565: 0,
1566: /*83*/ MatLoad_MPIDense,
1567: 0,
1568: 0,
1569: 0,
1570: 0,
1571: 0,
1572: /*89*/
1573: #if defined(PETSC_HAVE_PLAPACK)
1574: MatMatMult_MPIDense_MPIDense,
1575: MatMatMultSymbolic_MPIDense_MPIDense,
1576: MatMatMultNumeric_MPIDense_MPIDense,
1577: #else
1578: 0,
1579: 0,
1580: 0,
1581: #endif
1582: 0,
1583: 0,
1584: /*94*/ 0,
1585: 0,
1586: 0,
1587: 0,
1588: 0,
1589: /*99*/ 0,
1590: 0,
1591: 0,
1592: MatConjugate_MPIDense,
1593: 0,
1594: /*104*/0,
1595: MatRealPart_MPIDense,
1596: MatImaginaryPart_MPIDense,
1597: 0
1598: };
1603: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1604: {
1605: Mat_MPIDense *a;
1609: mat->preallocated = PETSC_TRUE;
1610: /* Note: For now, when data is specified above, this assumes the user correctly
1611: allocates the local dense storage space. We should add error checking. */
1613: a = (Mat_MPIDense*)mat->data;
1614: MatCreate(PETSC_COMM_SELF,&a->A);
1615: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1616: MatSetType(a->A,MATSEQDENSE);
1617: MatSeqDenseSetPreallocation(a->A,data);
1618: PetscLogObjectParent(mat,a->A);
1619: return(0);
1620: }
1623: /*MC
1624: MAT_SOLVER_PLAPACK = "mpidense" - Parallel LU and Cholesky factorization for MATMPIDENSE matrices
1626: run config/configure.py with the option --download-plapack
1629: Options Database Keys:
1630: . -mat_plapack_nprows <n> - number of rows in processor partition
1631: . -mat_plapack_npcols <n> - number of columns in processor partition
1632: . -mat_plapack_nb <n> - block size of template vector
1633: . -mat_plapack_nb_alg <n> - algorithmic block size
1634: - -mat_plapack_ckerror <n> - error checking flag
1636: Level: intermediate
1638: .seealso: MatCreateMPIDense(), MATDENSE, MATSEQDENSE, PCFactorSetSolverPackage(), MatSolverPackage
1640: M*/
1645: PetscErrorCode MatCreate_MPIDense(Mat mat)
1646: {
1647: Mat_MPIDense *a;
1651: PetscNewLog(mat,Mat_MPIDense,&a);
1652: mat->data = (void*)a;
1653: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1654: mat->mapping = 0;
1656: mat->insertmode = NOT_SET_VALUES;
1657: MPI_Comm_rank(((PetscObject)mat)->comm,&a->rank);
1658: MPI_Comm_size(((PetscObject)mat)->comm,&a->size);
1660: PetscLayoutSetBlockSize(mat->rmap,1);
1661: PetscLayoutSetBlockSize(mat->cmap,1);
1662: PetscLayoutSetUp(mat->rmap);
1663: PetscLayoutSetUp(mat->cmap);
1664: a->nvec = mat->cmap->n;
1666: /* build cache for off array entries formed */
1667: a->donotstash = PETSC_FALSE;
1668: MatStashCreate_Private(((PetscObject)mat)->comm,1,&mat->stash);
1670: /* stuff used for matrix vector multiply */
1671: a->lvec = 0;
1672: a->Mvctx = 0;
1673: a->roworiented = PETSC_TRUE;
1675: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1676: "MatGetDiagonalBlock_MPIDense",
1677: MatGetDiagonalBlock_MPIDense);
1678: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1679: "MatMPIDenseSetPreallocation_MPIDense",
1680: MatMPIDenseSetPreallocation_MPIDense);
1681: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1682: "MatMatMult_MPIAIJ_MPIDense",
1683: MatMatMult_MPIAIJ_MPIDense);
1684: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1685: "MatMatMultSymbolic_MPIAIJ_MPIDense",
1686: MatMatMultSymbolic_MPIAIJ_MPIDense);
1687: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1688: "MatMatMultNumeric_MPIAIJ_MPIDense",
1689: MatMatMultNumeric_MPIAIJ_MPIDense);
1690: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_petsc_C",
1691: "MatGetFactor_mpidense_petsc",
1692: MatGetFactor_mpidense_petsc);
1693: #if defined(PETSC_HAVE_PLAPACK)
1694: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetFactor_plapack_C",
1695: "MatGetFactor_mpidense_plapack",
1696: MatGetFactor_mpidense_plapack);
1697: PetscPLAPACKInitializePackage(((PetscObject)mat)->comm);
1698: #endif
1699: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1701: return(0);
1702: }
1705: /*MC
1706: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1708: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1709: and MATMPIDENSE otherwise.
1711: Options Database Keys:
1712: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1714: Level: beginner
1717: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1718: M*/
1723: PetscErrorCode MatCreate_Dense(Mat A)
1724: {
1726: PetscMPIInt size;
1729: MPI_Comm_size(((PetscObject)A)->comm,&size);
1730: if (size == 1) {
1731: MatSetType(A,MATSEQDENSE);
1732: } else {
1733: MatSetType(A,MATMPIDENSE);
1734: }
1735: return(0);
1736: }
1741: /*@C
1742: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1744: Not collective
1746: Input Parameters:
1747: . A - the matrix
1748: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1749: to control all matrix memory allocation.
1751: Notes:
1752: The dense format is fully compatible with standard Fortran 77
1753: storage by columns.
1755: The data input variable is intended primarily for Fortran programmers
1756: who wish to allocate their own matrix memory space. Most users should
1757: set data=PETSC_NULL.
1759: Level: intermediate
1761: .keywords: matrix,dense, parallel
1763: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1764: @*/
1765: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1766: {
1767: PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1770: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1771: if (f) {
1772: (*f)(mat,data);
1773: }
1774: return(0);
1775: }
1779: /*@C
1780: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1782: Collective on MPI_Comm
1784: Input Parameters:
1785: + comm - MPI communicator
1786: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1788: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1789: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1790: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1791: to control all matrix memory allocation.
1793: Output Parameter:
1794: . A - the matrix
1796: Notes:
1797: The dense format is fully compatible with standard Fortran 77
1798: storage by columns.
1800: The data input variable is intended primarily for Fortran programmers
1801: who wish to allocate their own matrix memory space. Most users should
1802: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1804: The user MUST specify either the local or global matrix dimensions
1805: (possibly both).
1807: Level: intermediate
1809: .keywords: matrix,dense, parallel
1811: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1812: @*/
1813: PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1814: {
1816: PetscMPIInt size;
1819: MatCreate(comm,A);
1820: MatSetSizes(*A,m,n,M,N);
1821: MPI_Comm_size(comm,&size);
1822: if (size > 1) {
1823: MatSetType(*A,MATMPIDENSE);
1824: MatMPIDenseSetPreallocation(*A,data);
1825: } else {
1826: MatSetType(*A,MATSEQDENSE);
1827: MatSeqDenseSetPreallocation(*A,data);
1828: }
1829: return(0);
1830: }
1834: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1835: {
1836: Mat mat;
1837: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1841: *newmat = 0;
1842: MatCreate(((PetscObject)A)->comm,&mat);
1843: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
1844: MatSetType(mat,((PetscObject)A)->type_name);
1845: a = (Mat_MPIDense*)mat->data;
1846: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1847: mat->factor = A->factor;
1848: mat->assembled = PETSC_TRUE;
1849: mat->preallocated = PETSC_TRUE;
1851: mat->rmap->rstart = A->rmap->rstart;
1852: mat->rmap->rend = A->rmap->rend;
1853: a->size = oldmat->size;
1854: a->rank = oldmat->rank;
1855: mat->insertmode = NOT_SET_VALUES;
1856: a->nvec = oldmat->nvec;
1857: a->donotstash = oldmat->donotstash;
1858:
1859: PetscMemcpy(mat->rmap->range,A->rmap->range,(a->size+1)*sizeof(PetscInt));
1860: PetscMemcpy(mat->cmap->range,A->cmap->range,(a->size+1)*sizeof(PetscInt));
1862: MatSetUpMultiply_MPIDense(mat);
1863: MatDuplicate(oldmat->A,cpvalues,&a->A);
1864: PetscLogObjectParent(mat,a->A);
1866: *newmat = mat;
1867: return(0);
1868: }
1872: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, const MatType type,Mat *newmat)
1873: {
1875: PetscMPIInt rank,size;
1876: PetscInt *rowners,i,m,nz,j;
1877: PetscScalar *array,*vals,*vals_ptr;
1878: MPI_Status status;
1881: MPI_Comm_rank(comm,&rank);
1882: MPI_Comm_size(comm,&size);
1884: /* determine ownership of all rows */
1885: m = M/size + ((M % size) > rank);
1886: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1887: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1888: rowners[0] = 0;
1889: for (i=2; i<=size; i++) {
1890: rowners[i] += rowners[i-1];
1891: }
1893: MatCreate(comm,newmat);
1894: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1895: MatSetType(*newmat,type);
1896: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1897: MatGetArray(*newmat,&array);
1899: if (!rank) {
1900: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1902: /* read in my part of the matrix numerical values */
1903: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1904:
1905: /* insert into matrix-by row (this is why cannot directly read into array */
1906: vals_ptr = vals;
1907: for (i=0; i<m; i++) {
1908: for (j=0; j<N; j++) {
1909: array[i + j*m] = *vals_ptr++;
1910: }
1911: }
1913: /* read in other processors and ship out */
1914: for (i=1; i<size; i++) {
1915: nz = (rowners[i+1] - rowners[i])*N;
1916: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1917: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)(*newmat))->tag,comm);
1918: }
1919: } else {
1920: /* receive numeric values */
1921: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1923: /* receive message of values*/
1924: MPI_Recv(vals,m*N,MPIU_SCALAR,0,((PetscObject)(*newmat))->tag,comm,&status);
1926: /* insert into matrix-by row (this is why cannot directly read into array */
1927: vals_ptr = vals;
1928: for (i=0; i<m; i++) {
1929: for (j=0; j<N; j++) {
1930: array[i + j*m] = *vals_ptr++;
1931: }
1932: }
1933: }
1934: PetscFree(rowners);
1935: PetscFree(vals);
1936: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1937: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1938: return(0);
1939: }
1943: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer,const MatType type,Mat *newmat)
1944: {
1945: Mat A;
1946: PetscScalar *vals,*svals;
1947: MPI_Comm comm = ((PetscObject)viewer)->comm;
1948: MPI_Status status;
1949: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1950: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1951: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1952: PetscInt i,nz,j,rstart,rend;
1953: int fd;
1957: MPI_Comm_size(comm,&size);
1958: MPI_Comm_rank(comm,&rank);
1959: if (!rank) {
1960: PetscViewerBinaryGetDescriptor(viewer,&fd);
1961: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1962: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1963: }
1965: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1966: M = header[1]; N = header[2]; nz = header[3];
1968: /*
1969: Handle case where matrix is stored on disk as a dense matrix
1970: */
1971: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1972: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1973: return(0);
1974: }
1976: /* determine ownership of all rows */
1977: m = PetscMPIIntCast(M/size + ((M % size) > rank));
1978: PetscMalloc((size+2)*sizeof(PetscMPIInt),&rowners);
1979: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1980: rowners[0] = 0;
1981: for (i=2; i<=size; i++) {
1982: rowners[i] += rowners[i-1];
1983: }
1984: rstart = rowners[rank];
1985: rend = rowners[rank+1];
1987: /* distribute row lengths to all processors */
1988: PetscMalloc2(rend-rstart,PetscInt,&ourlens,rend-rstart,PetscInt,&offlens);
1989: if (!rank) {
1990: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1991: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1992: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1993: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1994: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1995: PetscFree(sndcounts);
1996: } else {
1997: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1998: }
2000: if (!rank) {
2001: /* calculate the number of nonzeros on each processor */
2002: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2003: PetscMemzero(procsnz,size*sizeof(PetscInt));
2004: for (i=0; i<size; i++) {
2005: for (j=rowners[i]; j< rowners[i+1]; j++) {
2006: procsnz[i] += rowlengths[j];
2007: }
2008: }
2009: PetscFree(rowlengths);
2011: /* determine max buffer needed and allocate it */
2012: maxnz = 0;
2013: for (i=0; i<size; i++) {
2014: maxnz = PetscMax(maxnz,procsnz[i]);
2015: }
2016: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2018: /* read in my part of the matrix column indices */
2019: nz = procsnz[0];
2020: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2021: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2023: /* read in every one elses and ship off */
2024: for (i=1; i<size; i++) {
2025: nz = procsnz[i];
2026: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2027: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2028: }
2029: PetscFree(cols);
2030: } else {
2031: /* determine buffer space needed for message */
2032: nz = 0;
2033: for (i=0; i<m; i++) {
2034: nz += ourlens[i];
2035: }
2036: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
2038: /* receive message of column indices*/
2039: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2040: MPI_Get_count(&status,MPIU_INT,&maxnz);
2041: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2042: }
2044: /* loop over local rows, determining number of off diagonal entries */
2045: PetscMemzero(offlens,m*sizeof(PetscInt));
2046: jj = 0;
2047: for (i=0; i<m; i++) {
2048: for (j=0; j<ourlens[i]; j++) {
2049: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
2050: jj++;
2051: }
2052: }
2054: /* create our matrix */
2055: for (i=0; i<m; i++) {
2056: ourlens[i] -= offlens[i];
2057: }
2058: MatCreate(comm,newmat);
2059: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
2060: MatSetType(*newmat,type);
2061: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
2062: A = *newmat;
2063: for (i=0; i<m; i++) {
2064: ourlens[i] += offlens[i];
2065: }
2067: if (!rank) {
2068: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
2070: /* read in my part of the matrix numerical values */
2071: nz = procsnz[0];
2072: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2073:
2074: /* insert into matrix */
2075: jj = rstart;
2076: smycols = mycols;
2077: svals = vals;
2078: for (i=0; i<m; i++) {
2079: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2080: smycols += ourlens[i];
2081: svals += ourlens[i];
2082: jj++;
2083: }
2085: /* read in other processors and ship out */
2086: for (i=1; i<size; i++) {
2087: nz = procsnz[i];
2088: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2089: MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);
2090: }
2091: PetscFree(procsnz);
2092: } else {
2093: /* receive numeric values */
2094: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2096: /* receive message of values*/
2097: MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);
2098: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2099: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2101: /* insert into matrix */
2102: jj = rstart;
2103: smycols = mycols;
2104: svals = vals;
2105: for (i=0; i<m; i++) {
2106: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2107: smycols += ourlens[i];
2108: svals += ourlens[i];
2109: jj++;
2110: }
2111: }
2112: PetscFree2(ourlens,offlens);
2113: PetscFree(vals);
2114: PetscFree(mycols);
2115: PetscFree(rowners);
2117: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2118: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2119: return(0);
2120: }
2124: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
2125: {
2126: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2127: Mat a,b;
2128: PetscTruth flg;
2132: a = matA->A;
2133: b = matB->A;
2134: MatEqual(a,b,&flg);
2135: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);
2136: return(0);
2137: }
2139: #if defined(PETSC_HAVE_PLAPACK)
2143: /*@C
2144: PetscPLAPACKFinalizePackage - This function destroys everything in the Petsc interface to PLAPACK.
2145: Level: developer
2147: .keywords: Petsc, destroy, package, PLAPACK
2148: .seealso: PetscFinalize()
2149: @*/
2150: PetscErrorCode PetscPLAPACKFinalizePackage(void)
2151: {
2155: PLA_Finalize();
2156: return(0);
2157: }
2161: /*@C
2162: PetscPLAPACKInitializePackage - This function initializes everything in the Petsc interface to PLAPACK. It is
2163: called from MatCreate_MPIDense() the first time an MPI dense matrix is called.
2165: Input Parameter:
2166: . comm - the communicator the matrix lives on
2168: Level: developer
2170: Notes: PLAPACK does not have a good fit with MPI communicators; all (parallel) PLAPACK objects have to live in the
2171: same communicator (because there is some global state that is initialized and used for all matrices). In addition if
2172: PLAPACK is initialized (that is the initial matrices created) are on subcommunicators of MPI_COMM_WORLD, these subcommunicators
2173: cannot overlap.
2175: .keywords: Petsc, initialize, package, PLAPACK
2176: .seealso: PetscInitializePackage(), PetscInitialize()
2177: @*/
2178: PetscErrorCode PetscPLAPACKInitializePackage(MPI_Comm comm)
2179: {
2180: PetscMPIInt size;
2184: if (!PLA_Initialized(PETSC_NULL)) {
2186: MPI_Comm_size(comm,&size);
2187: Plapack_nprows = 1;
2188: Plapack_npcols = size;
2189:
2190: PetscOptionsBegin(comm,PETSC_NULL,"PLAPACK Options","Mat");
2191: PetscOptionsInt("-mat_plapack_nprows","row dimension of 2D processor mesh","None",Plapack_nprows,&Plapack_nprows,PETSC_NULL);
2192: PetscOptionsInt("-mat_plapack_npcols","column dimension of 2D processor mesh","None",Plapack_npcols,&Plapack_npcols,PETSC_NULL);
2193: #if defined(PETSC_USE_DEBUG)
2194: Plapack_ierror = 3;
2195: #else
2196: Plapack_ierror = 0;
2197: #endif
2198: PetscOptionsInt("-mat_plapack_ckerror","error checking flag","None",Plapack_ierror,&Plapack_ierror,PETSC_NULL);
2199: if (Plapack_ierror){
2200: PLA_Set_error_checking(Plapack_ierror,PETSC_TRUE,PETSC_TRUE,PETSC_FALSE );
2201: } else {
2202: PLA_Set_error_checking(Plapack_ierror,PETSC_FALSE,PETSC_FALSE,PETSC_FALSE );
2203: }
2204:
2205: Plapack_nb_alg = 0;
2206: PetscOptionsInt("-mat_plapack_nb_alg","algorithmic block size","None",Plapack_nb_alg,&Plapack_nb_alg,PETSC_NULL);
2207: if (Plapack_nb_alg) {
2208: pla_Environ_set_nb_alg (PLA_OP_ALL_ALG,Plapack_nb_alg);
2209: }
2210: PetscOptionsEnd();
2212: PLA_Comm_1D_to_2D(comm,Plapack_nprows,Plapack_npcols,&Plapack_comm_2d);
2213: PLA_Init(Plapack_comm_2d);
2214: PetscRegisterFinalize(PetscPLAPACKFinalizePackage);
2215: }
2216: return(0);
2217: }
2219: #endif