Actual source code: matstash.c
1: #define PETSCMAT_DLL
3: #include private/matimpl.h
5: #define DEFAULT_STASH_SIZE 10000
7: /*
8: MatStashCreate_Private - Creates a stash,currently used for all the parallel
9: matrix implementations. The stash is where elements of a matrix destined
10: to be stored on other processors are kept until matrix assembly is done.
12: This is a simple minded stash. Simply adds entries to end of stash.
14: Input Parameters:
15: comm - communicator, required for scatters.
16: bs - stash block size. used when stashing blocks of values
18: Output Parameters:
19: stash - the newly created stash
20: */
23: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
24: {
26: PetscInt max,*opt,nopt,i;
27: PetscTruth flg;
30: /* Require 2 tags,get the second using PetscCommGetNewTag() */
31: stash->comm = comm;
32: PetscCommGetNewTag(stash->comm,&stash->tag1);
33: PetscCommGetNewTag(stash->comm,&stash->tag2);
34: MPI_Comm_size(stash->comm,&stash->size);
35: MPI_Comm_rank(stash->comm,&stash->rank);
36: PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);
37: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
40: nopt = stash->size;
41: PetscMalloc(nopt*sizeof(PetscInt),&opt);
42: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
43: if (flg) {
44: if (nopt == 1) max = opt[0];
45: else if (nopt == stash->size) max = opt[stash->rank];
46: else if (stash->rank < nopt) max = opt[stash->rank];
47: else max = 0; /* Use default */
48: stash->umax = max;
49: } else {
50: stash->umax = 0;
51: }
52: PetscFree(opt);
53: if (bs <= 0) bs = 1;
55: stash->bs = bs;
56: stash->nmax = 0;
57: stash->oldnmax = 0;
58: stash->n = 0;
59: stash->reallocs = -1;
60: stash->space_head = 0;
61: stash->space = 0;
63: stash->send_waits = 0;
64: stash->recv_waits = 0;
65: stash->send_status = 0;
66: stash->nsends = 0;
67: stash->nrecvs = 0;
68: stash->svalues = 0;
69: stash->rvalues = 0;
70: stash->rindices = 0;
71: stash->nprocessed = 0;
72: return(0);
73: }
75: /*
76: MatStashDestroy_Private - Destroy the stash
77: */
80: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
81: {
85: if (stash->space_head){
86: PetscMatStashSpaceDestroy(stash->space_head);
87: stash->space_head = 0;
88: stash->space = 0;
89: }
90: PetscFree(stash->flg_v);
91: return(0);
92: }
94: /*
95: MatStashScatterEnd_Private - This is called as the fial stage of
96: scatter. The final stages of messagepassing is done here, and
97: all the memory used for messagepassing is cleanedu up. This
98: routine also resets the stash, and deallocates the memory used
99: for the stash. It also keeps track of the current memory usage
100: so that the same value can be used the next time through.
101: */
104: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105: {
107: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
108: MPI_Status *send_status;
111: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
112: /* wait on sends */
113: if (nsends) {
114: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
115: MPI_Waitall(2*nsends,stash->send_waits,send_status);
116: PetscFree(send_status);
117: }
119: /* Now update nmaxold to be app 10% more than max n used, this way the
120: wastage of space is reduced the next time this stash is used.
121: Also update the oldmax, only if it increases */
122: if (stash->n) {
123: bs2 = stash->bs*stash->bs;
124: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
125: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
126: }
128: stash->nmax = 0;
129: stash->n = 0;
130: stash->reallocs = -1;
131: stash->nprocessed = 0;
132: if (stash->space_head){
133: PetscMatStashSpaceDestroy(stash->space_head);
134: stash->space_head = 0;
135: stash->space = 0;
136: }
137: PetscFree(stash->send_waits);
138: stash->send_waits = 0;
139: PetscFree(stash->recv_waits);
140: stash->recv_waits = 0;
141: PetscFree2(stash->svalues,stash->sindices);
142: stash->svalues = 0;
143: PetscFree(stash->rvalues[0]);
144: PetscFree(stash->rvalues);
145: stash->rvalues = 0;
146: PetscFree(stash->rindices[0]);
147: PetscFree(stash->rindices);
148: stash->rindices = 0;
149: return(0);
150: }
152: /*
153: MatStashGetInfo_Private - Gets the relavant statistics of the stash
155: Input Parameters:
156: stash - the stash
157: nstash - the size of the stash. Indicates the number of values stored.
158: reallocs - the number of additional mallocs incurred.
159:
160: */
163: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
164: {
165: PetscInt bs2 = stash->bs*stash->bs;
168: if (nstash) *nstash = stash->n*bs2;
169: if (reallocs) {
170: if (stash->reallocs < 0) *reallocs = 0;
171: else *reallocs = stash->reallocs;
172: }
173: return(0);
174: }
176: /*
177: MatStashSetInitialSize_Private - Sets the initial size of the stash
179: Input Parameters:
180: stash - the stash
181: max - the value that is used as the max size of the stash.
182: this value is used while allocating memory.
183: */
186: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
187: {
189: stash->umax = max;
190: return(0);
191: }
193: /* MatStashExpand_Private - Expand the stash. This function is called
194: when the space in the stash is not sufficient to add the new values
195: being inserted into the stash.
196:
197: Input Parameters:
198: stash - the stash
199: incr - the minimum increase requested
200:
201: Notes:
202: This routine doubles the currently used memory.
203: */
206: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
207: {
209: PetscInt newnmax,bs2= stash->bs*stash->bs;
212: /* allocate a larger stash */
213: if (!stash->oldnmax && !stash->nmax) { /* new stash */
214: if (stash->umax) newnmax = stash->umax/bs2;
215: else newnmax = DEFAULT_STASH_SIZE/bs2;
216: } else if (!stash->nmax) { /* resuing stash */
217: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
218: else newnmax = stash->oldnmax/bs2;
219: } else newnmax = stash->nmax*2;
220: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
222: /* Get a MatStashSpace and attach it to stash */
223: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
224: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
225: stash->space_head = stash->space;
226: }
228: stash->reallocs++;
229: stash->nmax = newnmax;
230: return(0);
231: }
232: /*
233: MatStashValuesRow_Private - inserts values into the stash. This function
234: expects the values to be roworiented. Multiple columns belong to the same row
235: can be inserted with a single call to this function.
237: Input Parameters:
238: stash - the stash
239: row - the global row correspoiding to the values
240: n - the number of elements inserted. All elements belong to the above row.
241: idxn - the global column indices corresponding to each of the values.
242: values - the values inserted
243: */
246: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscTruth ignorezeroentries)
247: {
248: PetscErrorCode ierr;
249: PetscInt i,k,cnt = 0;
250: PetscMatStashSpace space=stash->space;
253: /* Check and see if we have sufficient memory */
254: if (!space || space->local_remaining < n){
255: MatStashExpand_Private(stash,n);
256: }
257: space = stash->space;
258: k = space->local_used;
259: for (i=0; i<n; i++) {
260: if (ignorezeroentries && (values[i] == 0.0)) continue;
261: space->idx[k] = row;
262: space->idy[k] = idxn[i];
263: space->val[k] = values[i];
264: k++;
265: cnt++;
266: }
267: stash->n += cnt;
268: space->local_used += cnt;
269: space->local_remaining -= cnt;
270: return(0);
271: }
273: /*
274: MatStashValuesCol_Private - inserts values into the stash. This function
275: expects the values to be columnoriented. Multiple columns belong to the same row
276: can be inserted with a single call to this function.
278: Input Parameters:
279: stash - the stash
280: row - the global row correspoiding to the values
281: n - the number of elements inserted. All elements belong to the above row.
282: idxn - the global column indices corresponding to each of the values.
283: values - the values inserted
284: stepval - the consecutive values are sepated by a distance of stepval.
285: this happens because the input is columnoriented.
286: */
289: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscTruth ignorezeroentries)
290: {
291: PetscErrorCode ierr;
292: PetscInt i,k,cnt = 0;
293: PetscMatStashSpace space=stash->space;
296: /* Check and see if we have sufficient memory */
297: if (!space || space->local_remaining < n){
298: MatStashExpand_Private(stash,n);
299: }
300: space = stash->space;
301: k = space->local_used;
302: for (i=0; i<n; i++) {
303: if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
304: space->idx[k] = row;
305: space->idy[k] = idxn[i];
306: space->val[k] = values[i*stepval];
307: k++;
308: cnt++;
309: }
310: stash->n += cnt;
311: space->local_used += cnt;
312: space->local_remaining -= cnt;
313: return(0);
314: }
316: /*
317: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
318: This function expects the values to be roworiented. Multiple columns belong
319: to the same block-row can be inserted with a single call to this function.
320: This function extracts the sub-block of values based on the dimensions of
321: the original input block, and the row,col values corresponding to the blocks.
323: Input Parameters:
324: stash - the stash
325: row - the global block-row correspoiding to the values
326: n - the number of elements inserted. All elements belong to the above row.
327: idxn - the global block-column indices corresponding to each of the blocks of
328: values. Each block is of size bs*bs.
329: values - the values inserted
330: rmax - the number of block-rows in the original block.
331: cmax - the number of block-columsn on the original block.
332: idx - the index of the current block-row in the original block.
333: */
336: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
337: {
338: PetscErrorCode ierr;
339: PetscInt i,j,k,bs2,bs=stash->bs,l;
340: const PetscScalar *vals;
341: PetscScalar *array;
342: PetscMatStashSpace space=stash->space;
345: if (!space || space->local_remaining < n){
346: MatStashExpand_Private(stash,n);
347: }
348: space = stash->space;
349: l = space->local_used;
350: bs2 = bs*bs;
351: for (i=0; i<n; i++) {
352: space->idx[l] = row;
353: space->idy[l] = idxn[i];
354: /* Now copy over the block of values. Store the values column oriented.
355: This enables inserting multiple blocks belonging to a row with a single
356: funtion call */
357: array = space->val + bs2*l;
358: vals = values + idx*bs2*n + bs*i;
359: for (j=0; j<bs; j++) {
360: for (k=0; k<bs; k++) array[k*bs] = vals[k];
361: array++;
362: vals += cmax*bs;
363: }
364: l++;
365: }
366: stash->n += n;
367: space->local_used += n;
368: space->local_remaining -= n;
369: return(0);
370: }
372: /*
373: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
374: This function expects the values to be roworiented. Multiple columns belong
375: to the same block-row can be inserted with a single call to this function.
376: This function extracts the sub-block of values based on the dimensions of
377: the original input block, and the row,col values corresponding to the blocks.
379: Input Parameters:
380: stash - the stash
381: row - the global block-row correspoiding to the values
382: n - the number of elements inserted. All elements belong to the above row.
383: idxn - the global block-column indices corresponding to each of the blocks of
384: values. Each block is of size bs*bs.
385: values - the values inserted
386: rmax - the number of block-rows in the original block.
387: cmax - the number of block-columsn on the original block.
388: idx - the index of the current block-row in the original block.
389: */
392: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
393: {
394: PetscErrorCode ierr;
395: PetscInt i,j,k,bs2,bs=stash->bs,l;
396: const PetscScalar *vals;
397: PetscScalar *array;
398: PetscMatStashSpace space=stash->space;
401: if (!space || space->local_remaining < n){
402: MatStashExpand_Private(stash,n);
403: }
404: space = stash->space;
405: l = space->local_used;
406: bs2 = bs*bs;
407: for (i=0; i<n; i++) {
408: space->idx[l] = row;
409: space->idy[l] = idxn[i];
410: /* Now copy over the block of values. Store the values column oriented.
411: This enables inserting multiple blocks belonging to a row with a single
412: funtion call */
413: array = space->val + bs2*l;
414: vals = values + idx*bs2*n + bs*i;
415: for (j=0; j<bs; j++) {
416: for (k=0; k<bs; k++) {array[k] = vals[k];}
417: array += bs;
418: vals += rmax*bs;
419: }
420: l++;
421: }
422: stash->n += n;
423: space->local_used += n;
424: space->local_remaining -= n;
425: return(0);
426: }
427: /*
428: MatStashScatterBegin_Private - Initiates the transfer of values to the
429: correct owners. This function goes through the stash, and check the
430: owners of each stashed value, and sends the values off to the owner
431: processors.
433: Input Parameters:
434: stash - the stash
435: owners - an array of size 'no-of-procs' which gives the ownership range
436: for each node.
438: Notes: The 'owners' array in the cased of the blocked-stash has the
439: ranges specified blocked global indices, and for the regular stash in
440: the proper global indices.
441: */
444: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
445: {
446: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
447: PetscInt size=stash->size,nsends;
448: PetscErrorCode ierr;
449: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
450: PetscScalar **rvalues,*svalues;
451: MPI_Comm comm = stash->comm;
452: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
453: PetscMPIInt *nprocs,*nlengths,nreceives;
454: PetscInt *sp_idx,*sp_idy;
455: PetscScalar *sp_val;
456: PetscMatStashSpace space,space_next;
459: bs2 = stash->bs*stash->bs;
460:
461: /* first count number of contributors to each processor */
462: PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);
463: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
464: PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);
465: PetscMemzero(nlengths,size*sizeof(PetscMPIInt));
466: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
468: i = j = 0;
469: lastidx = -1;
470: space = stash->space_head;
471: while (space != PETSC_NULL){
472: space_next = space->next;
473: sp_idx = space->idx;
474: for (l=0; l<space->local_used; l++){
475: /* if indices are NOT locally sorted, need to start search at the beginning */
476: if (lastidx > (idx = sp_idx[l])) j = 0;
477: lastidx = idx;
478: for (; j<size; j++) {
479: if (idx >= owners[j] && idx < owners[j+1]) {
480: nlengths[j]++; owner[i] = j; break;
481: }
482: }
483: i++;
484: }
485: space = space_next;
486: }
487: /* Now check what procs get messages - and compute nsends. */
488: for (i=0, nsends=0 ; i<size; i++) {
489: if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
490: }
492: {PetscMPIInt *onodes,*olengths;
493: /* Determine the number of messages to expect, their lengths, from from-ids */
494: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
495: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
496: /* since clubbing row,col - lengths are multiplied by 2 */
497: for (i=0; i<nreceives; i++) olengths[i] *=2;
498: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
499: /* values are size 'bs2' lengths (and remove earlier factor 2 */
500: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
501: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
502: PetscFree(onodes);
503: PetscFree(olengths);
504: }
506: /* do sends:
507: 1) starts[i] gives the starting index in svalues for stuff going to
508: the ith processor
509: */
510: PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);
511: PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);
512: PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);
513: /* use 2 sends the first with all_a, the next with all_i and all_j */
514: startv[0] = 0; starti[0] = 0;
515: for (i=1; i<size; i++) {
516: startv[i] = startv[i-1] + nlengths[i-1];
517: starti[i] = starti[i-1] + 2*nlengths[i-1];
518: }
519:
520: i = 0;
521: space = stash->space_head;
522: while (space != PETSC_NULL){
523: space_next = space->next;
524: sp_idx = space->idx;
525: sp_idy = space->idy;
526: sp_val = space->val;
527: for (l=0; l<space->local_used; l++){
528: j = owner[i];
529: if (bs2 == 1) {
530: svalues[startv[j]] = sp_val[l];
531: } else {
532: PetscInt k;
533: PetscScalar *buf1,*buf2;
534: buf1 = svalues+bs2*startv[j];
535: buf2 = space->val + bs2*l;
536: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
537: }
538: sindices[starti[j]] = sp_idx[l];
539: sindices[starti[j]+nlengths[j]] = sp_idy[l];
540: startv[j]++;
541: starti[j]++;
542: i++;
543: }
544: space = space_next;
545: }
546: startv[0] = 0;
547: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
549: for (i=0,count=0; i<size; i++) {
550: if (nprocs[i]) {
551: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
552: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
553: }
554: }
555: #if defined(PETSC_USE_INFO)
556: PetscInfo1(mat,"No of messages: %d \n",nsends);
557: for (i=0; i<size; i++) {
558: if (nprocs[i]) {
559: PetscInfo2(mat,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(PetscScalar)+2*sizeof(PetscInt));
560: }
561: }
562: #endif
563: PetscFree(nlengths);
564: PetscFree(owner);
565: PetscFree2(startv,starti);
566: PetscFree(nprocs);
567:
568: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
569: PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);
571: for (i=0; i<nreceives; i++) {
572: recv_waits[2*i] = recv_waits1[i];
573: recv_waits[2*i+1] = recv_waits2[i];
574: }
575: stash->recv_waits = recv_waits;
576: PetscFree(recv_waits1);
577: PetscFree(recv_waits2);
579: stash->svalues = svalues;
580: stash->sindices = sindices;
581: stash->rvalues = rvalues;
582: stash->rindices = rindices;
583: stash->send_waits = send_waits;
584: stash->nsends = nsends;
585: stash->nrecvs = nreceives;
586: return(0);
587: }
589: /*
590: MatStashScatterGetMesg_Private - This function waits on the receives posted
591: in the function MatStashScatterBegin_Private() and returns one message at
592: a time to the calling function. If no messages are left, it indicates this
593: by setting flg = 0, else it sets flg = 1.
595: Input Parameters:
596: stash - the stash
598: Output Parameters:
599: nvals - the number of entries in the current message.
600: rows - an array of row indices (or blocked indices) corresponding to the values
601: cols - an array of columnindices (or blocked indices) corresponding to the values
602: vals - the values
603: flg - 0 indicates no more message left, and the current call has no values associated.
604: 1 indicates that the current call successfully received a message, and the
605: other output parameters nvals,rows,cols,vals are set appropriately.
606: */
609: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,PetscScalar **vals,PetscInt *flg)
610: {
612: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
613: PetscInt bs2;
614: MPI_Status recv_status;
615: PetscTruth match_found = PETSC_FALSE;
619: *flg = 0; /* When a message is discovered this is reset to 1 */
620: /* Return if no more messages to process */
621: if (stash->nprocessed == stash->nrecvs) { return(0); }
623: bs2 = stash->bs*stash->bs;
624: /* If a matching pair of receieves are found, process them, and return the data to
625: the calling function. Until then keep receiving messages */
626: while (!match_found) {
627: CHKMEMQ;
628: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
629: CHKMEMQ;
630: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_ERR_PLIB,"Negative MPI source!");
632: /* Now pack the received message into a structure which is useable by others */
633: if (i % 2) {
634: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
635: flg_v[2*recv_status.MPI_SOURCE] = i/2;
636: *nvals = *nvals/bs2;
637: } else {
638: MPI_Get_count(&recv_status,MPIU_INT,nvals);
639: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
640: *nvals = *nvals/2; /* This message has both row indices and col indices */
641: }
642:
643: /* Check if we have both messages from this proc */
644: i1 = flg_v[2*recv_status.MPI_SOURCE];
645: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
646: if (i1 != -1 && i2 != -1) {
647: *rows = stash->rindices[i2];
648: *cols = *rows + *nvals;
649: *vals = stash->rvalues[i1];
650: *flg = 1;
651: stash->nprocessed ++;
652: match_found = PETSC_TRUE;
653: }
654: }
655: return(0);
656: }