Actual source code: vpscat.c
1: #define PETSCVEC_DLL
3: /*
4: Defines parallel vector scatters.
5: */
7: #include private/isimpl.h
8: #include private/vecimpl.h
9: #include ../src/vec/vec/impls/dvecimpl.h
10: #include ../src/vec/vec/impls/mpi/pvecimpl.h
14: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
15: {
16: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
17: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
18: PetscErrorCode ierr;
19: PetscInt i;
20: PetscMPIInt rank;
21: PetscViewerFormat format;
22: PetscTruth iascii;
25: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
26: if (iascii) {
27: MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
28: PetscViewerGetFormat(viewer,&format);
29: if (format == PETSC_VIEWER_ASCII_INFO) {
30: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
32: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
33: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
34: itmp = to->starts[to->n+1];
35: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
36: itmp = from->starts[from->n+1];
37: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
38: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);
40: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
41: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
42: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
43: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
44: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
45: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
47: } else {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
49: if (to->n) {
50: for (i=0; i<to->n; i++){
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
52: }
53: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
54: for (i=0; i<to->starts[to->n]; i++){
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
56: }
57: }
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
60: if (from->n) {
61: for (i=0; i<from->n; i++){
62: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
63: }
65: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
66: for (i=0; i<from->starts[from->n]; i++){
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
68: }
69: }
70: if (to->local.n) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
72: for (i=0; i<to->local.n; i++){
73: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
74: }
75: }
77: PetscViewerFlush(viewer);
78: }
79: } else {
80: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
81: }
82: return(0);
83: }
85: /* -----------------------------------------------------------------------------------*/
86: /*
87: The next routine determines what part of the local part of the scatter is an
88: exact copy of values into their current location. We check this here and
89: then know that we need not perform that portion of the scatter when the vector is
90: scattering to itself with INSERT_VALUES.
92: This is currently not used but would speed up, for example DALocalToLocalBegin/End()
94: */
97: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
98: {
99: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
101: PetscInt *nto_slots,*nfrom_slots,j = 0;
102:
104: for (i=0; i<n; i++) {
105: if (to_slots[i] != from_slots[i]) n_nonmatching++;
106: }
108: if (!n_nonmatching) {
109: to->nonmatching_computed = PETSC_TRUE;
110: to->n_nonmatching = from->n_nonmatching = 0;
111: PetscInfo1(scatter,"Reduced %D to 0\n", n);
112: } else if (n_nonmatching == n) {
113: to->nonmatching_computed = PETSC_FALSE;
114: PetscInfo(scatter,"All values non-matching\n");
115: } else {
116: to->nonmatching_computed= PETSC_TRUE;
117: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
120: to->slots_nonmatching = nto_slots;
121: from->slots_nonmatching = nfrom_slots;
122: for (i=0; i<n; i++) {
123: if (to_slots[i] != from_slots[i]) {
124: nto_slots[j] = to_slots[i];
125: nfrom_slots[j] = from_slots[i];
126: j++;
127: }
128: }
129: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
130: }
131: return(0);
132: }
134: /* --------------------------------------------------------------------------------------*/
136: /* -------------------------------------------------------------------------------------*/
139: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
140: {
141: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
142: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
143: PetscErrorCode ierr;
144: PetscInt i;
147: if (to->use_readyreceiver) {
148: /*
149: Since we have already posted sends we must cancel them before freeing
150: the requests
151: */
152: for (i=0; i<from->n; i++) {
153: MPI_Cancel(from->requests+i);
154: }
155: for (i=0; i<to->n; i++) {
156: MPI_Cancel(to->rev_requests+i);
157: }
158: }
160: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
161: if (to->use_alltoallw) {
162: PetscFree3(to->wcounts,to->wdispls,to->types);
163: PetscFree3(from->wcounts,from->wdispls,from->types);
164: }
165: #endif
167: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
168: if (to->use_window) {
169: MPI_Win_free(&from->window);
170: MPI_Win_free(&to->window);
171: PetscFree(from->winstarts);
172: PetscFree(to->winstarts);
173: }
174: #endif
176: if (to->use_alltoallv) {
177: PetscFree2(to->counts,to->displs);
178: PetscFree2(from->counts,from->displs);
179: }
181: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
182: /*
183: IBM's PE version of MPI has a bug where freeing these guys will screw up later
184: message passing.
185: */
186: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
187: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
188: if (to->requests) {
189: for (i=0; i<to->n; i++) {
190: MPI_Request_free(to->requests + i);
191: }
192: }
193: if (to->rev_requests) {
194: for (i=0; i<to->n; i++) {
195: MPI_Request_free(to->rev_requests + i);
196: }
197: }
198: }
199: /*
200: MPICH could not properly cancel requests thus with ready receiver mode we
201: cannot free the requests. It may be fixed now, if not then put the following
202: code inside a if !to->use_readyreceiver) {
203: */
204: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
205: if (from->requests) {
206: for (i=0; i<from->n; i++) {
207: MPI_Request_free(from->requests + i);
208: }
209: }
211: if (from->rev_requests) {
212: for (i=0; i<from->n; i++) {
213: MPI_Request_free(from->rev_requests + i);
214: }
215: }
216: }
217: #endif
219: PetscFree(to->local.vslots);
220: PetscFree(from->local.vslots);
221: PetscFree2(to->counts,to->displs);
222: PetscFree2(from->counts,from->displs);
223: PetscFree(to->local.slots_nonmatching);
224: PetscFree(from->local.slots_nonmatching);
225: PetscFree(to->rev_requests);
226: PetscFree(from->rev_requests);
227: PetscFree(to->requests);
228: PetscFree(from->requests);
229: PetscFree4(to->values,to->indices,to->starts,to->procs);
230: PetscFree2(to->sstatus,to->rstatus);
231: PetscFree4(from->values,from->indices,from->starts,from->procs);
232: PetscFree(from);
233: PetscFree(to);
234: PetscHeaderDestroy(ctx);
235: return(0);
236: }
240: /* --------------------------------------------------------------------------------------*/
241: /*
242: Special optimization to see if the local part of the scatter is actually
243: a copy. The scatter routines call PetscMemcpy() instead.
244:
245: */
248: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
249: {
250: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
251: PetscInt to_start,from_start;
255: to_start = to_slots[0];
256: from_start = from_slots[0];
258: for (i=1; i<n; i++) {
259: to_start += bs;
260: from_start += bs;
261: if (to_slots[i] != to_start) return(0);
262: if (from_slots[i] != from_start) return(0);
263: }
264: to->is_copy = PETSC_TRUE;
265: to->copy_start = to_slots[0];
266: to->copy_length = bs*sizeof(PetscScalar)*n;
267: from->is_copy = PETSC_TRUE;
268: from->copy_start = from_slots[0];
269: from->copy_length = bs*sizeof(PetscScalar)*n;
270: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
271: return(0);
272: }
274: /* --------------------------------------------------------------------------------------*/
278: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
279: {
280: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
281: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
282: PetscErrorCode ierr;
283: PetscInt ny,bs = in_from->bs;
286: out->begin = in->begin;
287: out->end = in->end;
288: out->copy = in->copy;
289: out->destroy = in->destroy;
290: out->view = in->view;
292: /* allocate entire send scatter context */
293: PetscNewLog(out,VecScatter_MPI_General,&out_to);
294: PetscNewLog(out,VecScatter_MPI_General,&out_from);
296: ny = in_to->starts[in_to->n];
297: out_to->n = in_to->n;
298: out_to->type = in_to->type;
299: out_to->sendfirst = in_to->sendfirst;
301: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
302: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
303: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
304: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
305: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
306: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
307:
308: out->todata = (void*)out_to;
309: out_to->local.n = in_to->local.n;
310: out_to->local.nonmatching_computed = PETSC_FALSE;
311: out_to->local.n_nonmatching = 0;
312: out_to->local.slots_nonmatching = 0;
313: if (in_to->local.n) {
314: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
315: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
316: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
317: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
318: } else {
319: out_to->local.vslots = 0;
320: out_from->local.vslots = 0;
321: }
323: /* allocate entire receive context */
324: out_from->type = in_from->type;
325: ny = in_from->starts[in_from->n];
326: out_from->n = in_from->n;
327: out_from->sendfirst = in_from->sendfirst;
329: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
330: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
331: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
332: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
333: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
334: out->fromdata = (void*)out_from;
335: out_from->local.n = in_from->local.n;
336: out_from->local.nonmatching_computed = PETSC_FALSE;
337: out_from->local.n_nonmatching = 0;
338: out_from->local.slots_nonmatching = 0;
340: /*
341: set up the request arrays for use with isend_init() and irecv_init()
342: */
343: {
344: PetscMPIInt tag;
345: MPI_Comm comm;
346: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
347: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
348: PetscInt i;
349: PetscTruth flg;
350: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
351: MPI_Request *rev_swaits,*rev_rwaits;
352: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
354: PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
355: PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);
357: rev_rwaits = out_to->rev_requests;
358: rev_swaits = out_from->rev_requests;
360: out_from->bs = out_to->bs = bs;
361: tag = ((PetscObject)out)->tag;
362: comm = ((PetscObject)out)->comm;
364: /* Register the receives that you will use later (sends for scatter reverse) */
365: for (i=0; i<out_from->n; i++) {
366: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
367: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
368: }
370: flg = PETSC_FALSE;
371: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&flg,PETSC_NULL);
372: if (flg) {
373: out_to->use_readyreceiver = PETSC_TRUE;
374: out_from->use_readyreceiver = PETSC_TRUE;
375: for (i=0; i<out_to->n; i++) {
376: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
377: }
378: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
379: MPI_Barrier(comm);
380: PetscInfo(in,"Using VecScatter ready receiver mode\n");
381: } else {
382: out_to->use_readyreceiver = PETSC_FALSE;
383: out_from->use_readyreceiver = PETSC_FALSE;
384: flg = PETSC_FALSE;
385: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&flg,PETSC_NULL);
386: if (flg) {
387: PetscInfo(in,"Using VecScatter Ssend mode\n");
388: }
389: for (i=0; i<out_to->n; i++) {
390: if (!flg) {
391: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
392: } else {
393: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
394: }
395: }
396: }
397: /* Register receives for scatter reverse */
398: for (i=0; i<out_to->n; i++) {
399: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
400: }
401: }
403: return(0);
404: }
408: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
409: {
410: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
411: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
412: PetscErrorCode ierr;
413: PetscInt ny,bs = in_from->bs;
414: PetscMPIInt size;
417: MPI_Comm_size(((PetscObject)in)->comm,&size);
418: out->begin = in->begin;
419: out->end = in->end;
420: out->copy = in->copy;
421: out->destroy = in->destroy;
422: out->view = in->view;
424: /* allocate entire send scatter context */
425: PetscNewLog(out,VecScatter_MPI_General,&out_to);
426: PetscNewLog(out,VecScatter_MPI_General,&out_from);
428: ny = in_to->starts[in_to->n];
429: out_to->n = in_to->n;
430: out_to->type = in_to->type;
431: out_to->sendfirst = in_to->sendfirst;
433: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
434: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
435: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
436: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
437: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
438: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
439:
440: out->todata = (void*)out_to;
441: out_to->local.n = in_to->local.n;
442: out_to->local.nonmatching_computed = PETSC_FALSE;
443: out_to->local.n_nonmatching = 0;
444: out_to->local.slots_nonmatching = 0;
445: if (in_to->local.n) {
446: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
447: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
448: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
449: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
450: } else {
451: out_to->local.vslots = 0;
452: out_from->local.vslots = 0;
453: }
455: /* allocate entire receive context */
456: out_from->type = in_from->type;
457: ny = in_from->starts[in_from->n];
458: out_from->n = in_from->n;
459: out_from->sendfirst = in_from->sendfirst;
461: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
462: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
463: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
464: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
465: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
466: out->fromdata = (void*)out_from;
467: out_from->local.n = in_from->local.n;
468: out_from->local.nonmatching_computed = PETSC_FALSE;
469: out_from->local.n_nonmatching = 0;
470: out_from->local.slots_nonmatching = 0;
472: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
474: PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
475: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
476: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
478: PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
479: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
480: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
481: return(0);
482: }
483: /* --------------------------------------------------------------------------------------------------
484: Packs and unpacks the message data into send or from receive buffers.
486: These could be generated automatically.
488: Fortran kernels etc. could be used.
489: */
490: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
491: {
492: PetscInt i;
493: for (i=0; i<n; i++) {
494: y[i] = x[indicesx[i]];
495: }
496: }
497: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
498: {
499: PetscInt i;
500: switch (addv) {
501: case INSERT_VALUES:
502: for (i=0; i<n; i++) {
503: y[indicesy[i]] = x[i];
504: }
505: break;
506: case ADD_VALUES:
507: for (i=0; i<n; i++) {
508: y[indicesy[i]] += x[i];
509: }
510: break;
511: #if !defined(PETSC_USE_COMPLEX)
512: case MAX_VALUES:
513: for (i=0; i<n; i++) {
514: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
515: }
516: #else
517: case MAX_VALUES:
518: #endif
519: case NOT_SET_VALUES:
520: break;
521: }
522: }
524: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
525: {
526: PetscInt i;
527: switch (addv) {
528: case INSERT_VALUES:
529: for (i=0; i<n; i++) {
530: y[indicesy[i]] = x[indicesx[i]];
531: }
532: break;
533: case ADD_VALUES:
534: for (i=0; i<n; i++) {
535: y[indicesy[i]] += x[indicesx[i]];
536: }
537: break;
538: #if !defined(PETSC_USE_COMPLEX)
539: case MAX_VALUES:
540: for (i=0; i<n; i++) {
541: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
542: }
543: #else
544: case MAX_VALUES:
545: #endif
546: case NOT_SET_VALUES:
547: break;
548: }
549: }
551: /* ----------------------------------------------------------------------------------------------- */
552: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
553: {
554: PetscInt i,idx;
556: for (i=0; i<n; i++) {
557: idx = *indicesx++;
558: y[0] = x[idx];
559: y[1] = x[idx+1];
560: y += 2;
561: }
562: }
563: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
564: {
565: PetscInt i,idy;
567: switch (addv) {
568: case INSERT_VALUES:
569: for (i=0; i<n; i++) {
570: idy = *indicesy++;
571: y[idy] = x[0];
572: y[idy+1] = x[1];
573: x += 2;
574: }
575: break;
576: case ADD_VALUES:
577: for (i=0; i<n; i++) {
578: idy = *indicesy++;
579: y[idy] += x[0];
580: y[idy+1] += x[1];
581: x += 2;
582: }
583: break;
584: #if !defined(PETSC_USE_COMPLEX)
585: case MAX_VALUES:
586: for (i=0; i<n; i++) {
587: idy = *indicesy++;
588: y[idy] = PetscMax(y[idy],x[0]);
589: y[idy+1] = PetscMax(y[idy+1],x[1]);
590: x += 2;
591: }
592: #else
593: case MAX_VALUES:
594: #endif
595: case NOT_SET_VALUES:
596: break;
597: }
598: }
600: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
601: {
602: PetscInt i,idx,idy;
604: switch (addv) {
605: case INSERT_VALUES:
606: for (i=0; i<n; i++) {
607: idx = *indicesx++;
608: idy = *indicesy++;
609: y[idy] = x[idx];
610: y[idy+1] = x[idx+1];
611: }
612: break;
613: case ADD_VALUES:
614: for (i=0; i<n; i++) {
615: idx = *indicesx++;
616: idy = *indicesy++;
617: y[idy] += x[idx];
618: y[idy+1] += x[idx+1];
619: }
620: break;
621: #if !defined(PETSC_USE_COMPLEX)
622: case MAX_VALUES:
623: for (i=0; i<n; i++) {
624: idx = *indicesx++;
625: idy = *indicesy++;
626: y[idy] = PetscMax(y[idy],x[idx]);
627: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
628: }
629: #else
630: case MAX_VALUES:
631: #endif
632: case NOT_SET_VALUES:
633: break;
634: }
635: }
636: /* ----------------------------------------------------------------------------------------------- */
637: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
638: {
639: PetscInt i,idx;
641: for (i=0; i<n; i++) {
642: idx = *indicesx++;
643: y[0] = x[idx];
644: y[1] = x[idx+1];
645: y[2] = x[idx+2];
646: y += 3;
647: }
648: }
649: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
650: {
651: PetscInt i,idy;
653: switch (addv) {
654: case INSERT_VALUES:
655: for (i=0; i<n; i++) {
656: idy = *indicesy++;
657: y[idy] = x[0];
658: y[idy+1] = x[1];
659: y[idy+2] = x[2];
660: x += 3;
661: }
662: break;
663: case ADD_VALUES:
664: for (i=0; i<n; i++) {
665: idy = *indicesy++;
666: y[idy] += x[0];
667: y[idy+1] += x[1];
668: y[idy+2] += x[2];
669: x += 3;
670: }
671: break;
672: #if !defined(PETSC_USE_COMPLEX)
673: case MAX_VALUES:
674: for (i=0; i<n; i++) {
675: idy = *indicesy++;
676: y[idy] = PetscMax(y[idy],x[0]);
677: y[idy+1] = PetscMax(y[idy+1],x[1]);
678: y[idy+2] = PetscMax(y[idy+2],x[2]);
679: x += 3;
680: }
681: #else
682: case MAX_VALUES:
683: #endif
684: case NOT_SET_VALUES:
685: break;
686: }
687: }
689: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
690: {
691: PetscInt i,idx,idy;
693: switch (addv) {
694: case INSERT_VALUES:
695: for (i=0; i<n; i++) {
696: idx = *indicesx++;
697: idy = *indicesy++;
698: y[idy] = x[idx];
699: y[idy+1] = x[idx+1];
700: y[idy+2] = x[idx+2];
701: }
702: break;
703: case ADD_VALUES:
704: for (i=0; i<n; i++) {
705: idx = *indicesx++;
706: idy = *indicesy++;
707: y[idy] += x[idx];
708: y[idy+1] += x[idx+1];
709: y[idy+2] += x[idx+2];
710: }
711: break;
712: #if !defined(PETSC_USE_COMPLEX)
713: case MAX_VALUES:
714: for (i=0; i<n; i++) {
715: idx = *indicesx++;
716: idy = *indicesy++;
717: y[idy] = PetscMax(y[idy],x[idx]);
718: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
719: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
720: }
721: #else
722: case MAX_VALUES:
723: #endif
724: case NOT_SET_VALUES:
725: break;
726: }
727: }
728: /* ----------------------------------------------------------------------------------------------- */
729: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
730: {
731: PetscInt i,idx;
733: for (i=0; i<n; i++) {
734: idx = *indicesx++;
735: y[0] = x[idx];
736: y[1] = x[idx+1];
737: y[2] = x[idx+2];
738: y[3] = x[idx+3];
739: y += 4;
740: }
741: }
742: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
743: {
744: PetscInt i,idy;
746: switch (addv) {
747: case INSERT_VALUES:
748: for (i=0; i<n; i++) {
749: idy = *indicesy++;
750: y[idy] = x[0];
751: y[idy+1] = x[1];
752: y[idy+2] = x[2];
753: y[idy+3] = x[3];
754: x += 4;
755: }
756: break;
757: case ADD_VALUES:
758: for (i=0; i<n; i++) {
759: idy = *indicesy++;
760: y[idy] += x[0];
761: y[idy+1] += x[1];
762: y[idy+2] += x[2];
763: y[idy+3] += x[3];
764: x += 4;
765: }
766: break;
767: #if !defined(PETSC_USE_COMPLEX)
768: case MAX_VALUES:
769: for (i=0; i<n; i++) {
770: idy = *indicesy++;
771: y[idy] = PetscMax(y[idy],x[0]);
772: y[idy+1] = PetscMax(y[idy+1],x[1]);
773: y[idy+2] = PetscMax(y[idy+2],x[2]);
774: y[idy+3] = PetscMax(y[idy+3],x[3]);
775: x += 4;
776: }
777: #else
778: case MAX_VALUES:
779: #endif
780: case NOT_SET_VALUES:
781: break;
782: }
783: }
785: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
786: {
787: PetscInt i,idx,idy;
789: switch (addv) {
790: case INSERT_VALUES:
791: for (i=0; i<n; i++) {
792: idx = *indicesx++;
793: idy = *indicesy++;
794: y[idy] = x[idx];
795: y[idy+1] = x[idx+1];
796: y[idy+2] = x[idx+2];
797: y[idy+3] = x[idx+3];
798: }
799: break;
800: case ADD_VALUES:
801: for (i=0; i<n; i++) {
802: idx = *indicesx++;
803: idy = *indicesy++;
804: y[idy] += x[idx];
805: y[idy+1] += x[idx+1];
806: y[idy+2] += x[idx+2];
807: y[idy+3] += x[idx+3];
808: }
809: break;
810: #if !defined(PETSC_USE_COMPLEX)
811: case MAX_VALUES:
812: for (i=0; i<n; i++) {
813: idx = *indicesx++;
814: idy = *indicesy++;
815: y[idy] = PetscMax(y[idy],x[idx]);
816: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
817: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
818: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
819: }
820: #else
821: case MAX_VALUES:
822: #endif
823: case NOT_SET_VALUES:
824: break;
825: }
826: }
827: /* ----------------------------------------------------------------------------------------------- */
828: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
829: {
830: PetscInt i,idx;
832: for (i=0; i<n; i++) {
833: idx = *indicesx++;
834: y[0] = x[idx];
835: y[1] = x[idx+1];
836: y[2] = x[idx+2];
837: y[3] = x[idx+3];
838: y[4] = x[idx+4];
839: y += 5;
840: }
841: }
842: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
843: {
844: PetscInt i,idy;
846: switch (addv) {
847: case INSERT_VALUES:
848: for (i=0; i<n; i++) {
849: idy = *indicesy++;
850: y[idy] = x[0];
851: y[idy+1] = x[1];
852: y[idy+2] = x[2];
853: y[idy+3] = x[3];
854: y[idy+4] = x[4];
855: x += 5;
856: }
857: break;
858: case ADD_VALUES:
859: for (i=0; i<n; i++) {
860: idy = *indicesy++;
861: y[idy] += x[0];
862: y[idy+1] += x[1];
863: y[idy+2] += x[2];
864: y[idy+3] += x[3];
865: y[idy+4] += x[4];
866: x += 5;
867: }
868: break;
869: #if !defined(PETSC_USE_COMPLEX)
870: case MAX_VALUES:
871: for (i=0; i<n; i++) {
872: idy = *indicesy++;
873: y[idy] = PetscMax(y[idy],x[0]);
874: y[idy+1] = PetscMax(y[idy+1],x[1]);
875: y[idy+2] = PetscMax(y[idy+2],x[2]);
876: y[idy+3] = PetscMax(y[idy+3],x[3]);
877: y[idy+4] = PetscMax(y[idy+4],x[4]);
878: x += 5;
879: }
880: #else
881: case MAX_VALUES:
882: #endif
883: case NOT_SET_VALUES:
884: break;
885: }
886: }
888: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
889: {
890: PetscInt i,idx,idy;
892: switch (addv) {
893: case INSERT_VALUES:
894: for (i=0; i<n; i++) {
895: idx = *indicesx++;
896: idy = *indicesy++;
897: y[idy] = x[idx];
898: y[idy+1] = x[idx+1];
899: y[idy+2] = x[idx+2];
900: y[idy+3] = x[idx+3];
901: y[idy+4] = x[idx+4];
902: }
903: break;
904: case ADD_VALUES:
905: for (i=0; i<n; i++) {
906: idx = *indicesx++;
907: idy = *indicesy++;
908: y[idy] += x[idx];
909: y[idy+1] += x[idx+1];
910: y[idy+2] += x[idx+2];
911: y[idy+3] += x[idx+3];
912: y[idy+4] += x[idx+4];
913: }
914: break;
915: #if !defined(PETSC_USE_COMPLEX)
916: case MAX_VALUES:
917: for (i=0; i<n; i++) {
918: idx = *indicesx++;
919: idy = *indicesy++;
920: y[idy] = PetscMax(y[idy],x[idx]);
921: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
922: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
923: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
924: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
925: }
926: #else
927: case MAX_VALUES:
928: #endif
929: case NOT_SET_VALUES:
930: break;
931: }
932: }
933: /* ----------------------------------------------------------------------------------------------- */
934: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
935: {
936: PetscInt i,idx;
938: for (i=0; i<n; i++) {
939: idx = *indicesx++;
940: y[0] = x[idx];
941: y[1] = x[idx+1];
942: y[2] = x[idx+2];
943: y[3] = x[idx+3];
944: y[4] = x[idx+4];
945: y[5] = x[idx+5];
946: y += 6;
947: }
948: }
949: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
950: {
951: PetscInt i,idy;
953: switch (addv) {
954: case INSERT_VALUES:
955: for (i=0; i<n; i++) {
956: idy = *indicesy++;
957: y[idy] = x[0];
958: y[idy+1] = x[1];
959: y[idy+2] = x[2];
960: y[idy+3] = x[3];
961: y[idy+4] = x[4];
962: y[idy+5] = x[5];
963: x += 6;
964: }
965: break;
966: case ADD_VALUES:
967: for (i=0; i<n; i++) {
968: idy = *indicesy++;
969: y[idy] += x[0];
970: y[idy+1] += x[1];
971: y[idy+2] += x[2];
972: y[idy+3] += x[3];
973: y[idy+4] += x[4];
974: y[idy+5] += x[5];
975: x += 6;
976: }
977: break;
978: #if !defined(PETSC_USE_COMPLEX)
979: case MAX_VALUES:
980: for (i=0; i<n; i++) {
981: idy = *indicesy++;
982: y[idy] = PetscMax(y[idy],x[0]);
983: y[idy+1] = PetscMax(y[idy+1],x[1]);
984: y[idy+2] = PetscMax(y[idy+2],x[2]);
985: y[idy+3] = PetscMax(y[idy+3],x[3]);
986: y[idy+4] = PetscMax(y[idy+4],x[4]);
987: y[idy+5] = PetscMax(y[idy+5],x[5]);
988: x += 6;
989: }
990: #else
991: case MAX_VALUES:
992: #endif
993: case NOT_SET_VALUES:
994: break;
995: }
996: }
998: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
999: {
1000: PetscInt i,idx,idy;
1002: switch (addv) {
1003: case INSERT_VALUES:
1004: for (i=0; i<n; i++) {
1005: idx = *indicesx++;
1006: idy = *indicesy++;
1007: y[idy] = x[idx];
1008: y[idy+1] = x[idx+1];
1009: y[idy+2] = x[idx+2];
1010: y[idy+3] = x[idx+3];
1011: y[idy+4] = x[idx+4];
1012: y[idy+5] = x[idx+5];
1013: }
1014: break;
1015: case ADD_VALUES:
1016: for (i=0; i<n; i++) {
1017: idx = *indicesx++;
1018: idy = *indicesy++;
1019: y[idy] += x[idx];
1020: y[idy+1] += x[idx+1];
1021: y[idy+2] += x[idx+2];
1022: y[idy+3] += x[idx+3];
1023: y[idy+4] += x[idx+4];
1024: y[idy+5] += x[idx+5];
1025: }
1026: break;
1027: #if !defined(PETSC_USE_COMPLEX)
1028: case MAX_VALUES:
1029: for (i=0; i<n; i++) {
1030: idx = *indicesx++;
1031: idy = *indicesy++;
1032: y[idy] = PetscMax(y[idy],x[idx]);
1033: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1034: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1035: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1036: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1037: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1038: }
1039: #else
1040: case MAX_VALUES:
1041: #endif
1042: case NOT_SET_VALUES:
1043: break;
1044: }
1045: }
1046: /* ----------------------------------------------------------------------------------------------- */
1047: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1048: {
1049: PetscInt i,idx;
1051: for (i=0; i<n; i++) {
1052: idx = *indicesx++;
1053: y[0] = x[idx];
1054: y[1] = x[idx+1];
1055: y[2] = x[idx+2];
1056: y[3] = x[idx+3];
1057: y[4] = x[idx+4];
1058: y[5] = x[idx+5];
1059: y[6] = x[idx+6];
1060: y += 7;
1061: }
1062: }
1063: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1064: {
1065: PetscInt i,idy;
1067: switch (addv) {
1068: case INSERT_VALUES:
1069: for (i=0; i<n; i++) {
1070: idy = *indicesy++;
1071: y[idy] = x[0];
1072: y[idy+1] = x[1];
1073: y[idy+2] = x[2];
1074: y[idy+3] = x[3];
1075: y[idy+4] = x[4];
1076: y[idy+5] = x[5];
1077: y[idy+6] = x[6];
1078: x += 7;
1079: }
1080: break;
1081: case ADD_VALUES:
1082: for (i=0; i<n; i++) {
1083: idy = *indicesy++;
1084: y[idy] += x[0];
1085: y[idy+1] += x[1];
1086: y[idy+2] += x[2];
1087: y[idy+3] += x[3];
1088: y[idy+4] += x[4];
1089: y[idy+5] += x[5];
1090: y[idy+6] += x[6];
1091: x += 7;
1092: }
1093: break;
1094: #if !defined(PETSC_USE_COMPLEX)
1095: case MAX_VALUES:
1096: for (i=0; i<n; i++) {
1097: idy = *indicesy++;
1098: y[idy] = PetscMax(y[idy],x[0]);
1099: y[idy+1] = PetscMax(y[idy+1],x[1]);
1100: y[idy+2] = PetscMax(y[idy+2],x[2]);
1101: y[idy+3] = PetscMax(y[idy+3],x[3]);
1102: y[idy+4] = PetscMax(y[idy+4],x[4]);
1103: y[idy+5] = PetscMax(y[idy+5],x[5]);
1104: y[idy+6] = PetscMax(y[idy+6],x[6]);
1105: x += 7;
1106: }
1107: #else
1108: case MAX_VALUES:
1109: #endif
1110: case NOT_SET_VALUES:
1111: break;
1112: }
1113: }
1115: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1116: {
1117: PetscInt i,idx,idy;
1119: switch (addv) {
1120: case INSERT_VALUES:
1121: for (i=0; i<n; i++) {
1122: idx = *indicesx++;
1123: idy = *indicesy++;
1124: y[idy] = x[idx];
1125: y[idy+1] = x[idx+1];
1126: y[idy+2] = x[idx+2];
1127: y[idy+3] = x[idx+3];
1128: y[idy+4] = x[idx+4];
1129: y[idy+5] = x[idx+5];
1130: y[idy+6] = x[idx+6];
1131: }
1132: break;
1133: case ADD_VALUES:
1134: for (i=0; i<n; i++) {
1135: idx = *indicesx++;
1136: idy = *indicesy++;
1137: y[idy] += x[idx];
1138: y[idy+1] += x[idx+1];
1139: y[idy+2] += x[idx+2];
1140: y[idy+3] += x[idx+3];
1141: y[idy+4] += x[idx+4];
1142: y[idy+5] += x[idx+5];
1143: y[idy+6] += x[idx+6];
1144: }
1145: break;
1146: #if !defined(PETSC_USE_COMPLEX)
1147: case MAX_VALUES:
1148: for (i=0; i<n; i++) {
1149: idx = *indicesx++;
1150: idy = *indicesy++;
1151: y[idy] = PetscMax(y[idy],x[idx]);
1152: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1153: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1154: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1155: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1156: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1157: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1158: }
1159: #else
1160: case MAX_VALUES:
1161: #endif
1162: case NOT_SET_VALUES:
1163: break;
1164: }
1165: }
1166: /* ----------------------------------------------------------------------------------------------- */
1167: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1168: {
1169: PetscInt i,idx;
1171: for (i=0; i<n; i++) {
1172: idx = *indicesx++;
1173: y[0] = x[idx];
1174: y[1] = x[idx+1];
1175: y[2] = x[idx+2];
1176: y[3] = x[idx+3];
1177: y[4] = x[idx+4];
1178: y[5] = x[idx+5];
1179: y[6] = x[idx+6];
1180: y[7] = x[idx+7];
1181: y += 8;
1182: }
1183: }
1184: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1185: {
1186: PetscInt i,idy;
1188: switch (addv) {
1189: case INSERT_VALUES:
1190: for (i=0; i<n; i++) {
1191: idy = *indicesy++;
1192: y[idy] = x[0];
1193: y[idy+1] = x[1];
1194: y[idy+2] = x[2];
1195: y[idy+3] = x[3];
1196: y[idy+4] = x[4];
1197: y[idy+5] = x[5];
1198: y[idy+6] = x[6];
1199: y[idy+7] = x[7];
1200: x += 8;
1201: }
1202: break;
1203: case ADD_VALUES:
1204: for (i=0; i<n; i++) {
1205: idy = *indicesy++;
1206: y[idy] += x[0];
1207: y[idy+1] += x[1];
1208: y[idy+2] += x[2];
1209: y[idy+3] += x[3];
1210: y[idy+4] += x[4];
1211: y[idy+5] += x[5];
1212: y[idy+6] += x[6];
1213: y[idy+7] += x[7];
1214: x += 8;
1215: }
1216: break;
1217: #if !defined(PETSC_USE_COMPLEX)
1218: case MAX_VALUES:
1219: for (i=0; i<n; i++) {
1220: idy = *indicesy++;
1221: y[idy] = PetscMax(y[idy],x[0]);
1222: y[idy+1] = PetscMax(y[idy+1],x[1]);
1223: y[idy+2] = PetscMax(y[idy+2],x[2]);
1224: y[idy+3] = PetscMax(y[idy+3],x[3]);
1225: y[idy+4] = PetscMax(y[idy+4],x[4]);
1226: y[idy+5] = PetscMax(y[idy+5],x[5]);
1227: y[idy+6] = PetscMax(y[idy+6],x[6]);
1228: y[idy+7] = PetscMax(y[idy+7],x[7]);
1229: x += 8;
1230: }
1231: #else
1232: case MAX_VALUES:
1233: #endif
1234: case NOT_SET_VALUES:
1235: break;
1236: }
1237: }
1239: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1240: {
1241: PetscInt i,idx,idy;
1243: switch (addv) {
1244: case INSERT_VALUES:
1245: for (i=0; i<n; i++) {
1246: idx = *indicesx++;
1247: idy = *indicesy++;
1248: y[idy] = x[idx];
1249: y[idy+1] = x[idx+1];
1250: y[idy+2] = x[idx+2];
1251: y[idy+3] = x[idx+3];
1252: y[idy+4] = x[idx+4];
1253: y[idy+5] = x[idx+5];
1254: y[idy+6] = x[idx+6];
1255: y[idy+7] = x[idx+7];
1256: }
1257: break;
1258: case ADD_VALUES:
1259: for (i=0; i<n; i++) {
1260: idx = *indicesx++;
1261: idy = *indicesy++;
1262: y[idy] += x[idx];
1263: y[idy+1] += x[idx+1];
1264: y[idy+2] += x[idx+2];
1265: y[idy+3] += x[idx+3];
1266: y[idy+4] += x[idx+4];
1267: y[idy+5] += x[idx+5];
1268: y[idy+6] += x[idx+6];
1269: y[idy+7] += x[idx+7];
1270: }
1271: break;
1272: #if !defined(PETSC_USE_COMPLEX)
1273: case MAX_VALUES:
1274: for (i=0; i<n; i++) {
1275: idx = *indicesx++;
1276: idy = *indicesy++;
1277: y[idy] = PetscMax(y[idy],x[idx]);
1278: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1279: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1280: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1281: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1282: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1283: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1284: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1285: }
1286: #else
1287: case MAX_VALUES:
1288: #endif
1289: case NOT_SET_VALUES:
1290: break;
1291: }
1292: }
1294: /* ----------------------------------------------------------------------------------------------- */
1295: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1296: {
1297: PetscInt i,idx;
1299: for (i=0; i<n; i++) {
1300: idx = *indicesx++;
1301: y[0] = x[idx];
1302: y[1] = x[idx+1];
1303: y[2] = x[idx+2];
1304: y[3] = x[idx+3];
1305: y[4] = x[idx+4];
1306: y[5] = x[idx+5];
1307: y[6] = x[idx+6];
1308: y[7] = x[idx+7];
1309: y[8] = x[idx+8];
1310: y[9] = x[idx+9];
1311: y[10] = x[idx+10];
1312: y[11] = x[idx+11];
1313: y += 12;
1314: }
1315: }
1316: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1317: {
1318: PetscInt i,idy;
1320: switch (addv) {
1321: case INSERT_VALUES:
1322: for (i=0; i<n; i++) {
1323: idy = *indicesy++;
1324: y[idy] = x[0];
1325: y[idy+1] = x[1];
1326: y[idy+2] = x[2];
1327: y[idy+3] = x[3];
1328: y[idy+4] = x[4];
1329: y[idy+5] = x[5];
1330: y[idy+6] = x[6];
1331: y[idy+7] = x[7];
1332: y[idy+8] = x[8];
1333: y[idy+9] = x[9];
1334: y[idy+10] = x[10];
1335: y[idy+11] = x[11];
1336: x += 12;
1337: }
1338: break;
1339: case ADD_VALUES:
1340: for (i=0; i<n; i++) {
1341: idy = *indicesy++;
1342: y[idy] += x[0];
1343: y[idy+1] += x[1];
1344: y[idy+2] += x[2];
1345: y[idy+3] += x[3];
1346: y[idy+4] += x[4];
1347: y[idy+5] += x[5];
1348: y[idy+6] += x[6];
1349: y[idy+7] += x[7];
1350: y[idy+8] += x[8];
1351: y[idy+9] += x[9];
1352: y[idy+10] += x[10];
1353: y[idy+11] += x[11];
1354: x += 12;
1355: }
1356: break;
1357: #if !defined(PETSC_USE_COMPLEX)
1358: case MAX_VALUES:
1359: for (i=0; i<n; i++) {
1360: idy = *indicesy++;
1361: y[idy] = PetscMax(y[idy],x[0]);
1362: y[idy+1] = PetscMax(y[idy+1],x[1]);
1363: y[idy+2] = PetscMax(y[idy+2],x[2]);
1364: y[idy+3] = PetscMax(y[idy+3],x[3]);
1365: y[idy+4] = PetscMax(y[idy+4],x[4]);
1366: y[idy+5] = PetscMax(y[idy+5],x[5]);
1367: y[idy+6] = PetscMax(y[idy+6],x[6]);
1368: y[idy+7] = PetscMax(y[idy+7],x[7]);
1369: y[idy+8] = PetscMax(y[idy+8],x[8]);
1370: y[idy+9] = PetscMax(y[idy+9],x[9]);
1371: y[idy+10] = PetscMax(y[idy+10],x[10]);
1372: y[idy+11] = PetscMax(y[idy+11],x[11]);
1373: x += 12;
1374: }
1375: #else
1376: case MAX_VALUES:
1377: #endif
1378: case NOT_SET_VALUES:
1379: break;
1380: }
1381: }
1383: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1384: {
1385: PetscInt i,idx,idy;
1387: switch (addv) {
1388: case INSERT_VALUES:
1389: for (i=0; i<n; i++) {
1390: idx = *indicesx++;
1391: idy = *indicesy++;
1392: y[idy] = x[idx];
1393: y[idy+1] = x[idx+1];
1394: y[idy+2] = x[idx+2];
1395: y[idy+3] = x[idx+3];
1396: y[idy+4] = x[idx+4];
1397: y[idy+5] = x[idx+5];
1398: y[idy+6] = x[idx+6];
1399: y[idy+7] = x[idx+7];
1400: y[idy+8] = x[idx+8];
1401: y[idy+9] = x[idx+9];
1402: y[idy+10] = x[idx+10];
1403: y[idy+11] = x[idx+11];
1404: }
1405: break;
1406: case ADD_VALUES:
1407: for (i=0; i<n; i++) {
1408: idx = *indicesx++;
1409: idy = *indicesy++;
1410: y[idy] += x[idx];
1411: y[idy+1] += x[idx+1];
1412: y[idy+2] += x[idx+2];
1413: y[idy+3] += x[idx+3];
1414: y[idy+4] += x[idx+4];
1415: y[idy+5] += x[idx+5];
1416: y[idy+6] += x[idx+6];
1417: y[idy+7] += x[idx+7];
1418: y[idy+8] += x[idx+8];
1419: y[idy+9] += x[idx+9];
1420: y[idy+10] += x[idx+10];
1421: y[idy+11] += x[idx+11];
1422: }
1423: break;
1424: #if !defined(PETSC_USE_COMPLEX)
1425: case MAX_VALUES:
1426: for (i=0; i<n; i++) {
1427: idx = *indicesx++;
1428: idy = *indicesy++;
1429: y[idy] = PetscMax(y[idy],x[idx]);
1430: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1431: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1432: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1433: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1434: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1435: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1436: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1437: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1438: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1439: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1440: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1441: }
1442: #else
1443: case MAX_VALUES:
1444: #endif
1445: case NOT_SET_VALUES:
1446: break;
1447: }
1448: }
1450: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1451: #define BS 1
1452: #include ../src/vec/vec/utils/vpscat.h
1453: #define BS 2
1454: #include ../src/vec/vec/utils/vpscat.h
1455: #define BS 3
1456: #include ../src/vec/vec/utils/vpscat.h
1457: #define BS 4
1458: #include ../src/vec/vec/utils/vpscat.h
1459: #define BS 5
1460: #include ../src/vec/vec/utils/vpscat.h
1461: #define BS 6
1462: #include ../src/vec/vec/utils/vpscat.h
1463: #define BS 7
1464: #include ../src/vec/vec/utils/vpscat.h
1465: #define BS 8
1466: #include ../src/vec/vec/utils/vpscat.h
1467: #define BS 12
1468: #include ../src/vec/vec/utils/vpscat.h
1470: /* ==========================================================================================*/
1472: /* create parallel to sequential scatter context */
1474: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);
1478: /*@
1479: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1481: Collective on VecScatter
1483: Input Parameters:
1484: + VecScatter - obtained with VecScatterCreateEmpty()
1485: . nsends -
1486: . sendSizes -
1487: . sendProcs -
1488: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1489: . nrecvs - number of receives to expect
1490: . recvSizes -
1491: . recvProcs - processes who are sending to me
1492: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1493: - bs - size of block
1495: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1496: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1498: Probably does not handle sends to self properly. It should remove those from the counts that are used
1499: in allocating space inside of the from struct
1501: Level: intermediate
1503: @*/
1504: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1505: {
1506: VecScatter_MPI_General *from, *to;
1507: PetscInt sendSize, recvSize;
1508: PetscInt n, i;
1509: PetscErrorCode ierr;
1511: /* allocate entire send scatter context */
1512: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1513: to->n = nsends;
1514: for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1515: PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1516: PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1517: PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1518: to->starts[0] = 0;
1519: for(n = 0; n < to->n; n++) {
1520: if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1521: to->starts[n+1] = to->starts[n] + sendSizes[n];
1522: to->procs[n] = sendProcs[n];
1523: for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1524: to->indices[i] = sendIdx[i];
1525: }
1526: }
1527: ctx->todata = (void *) to;
1529: /* allocate entire receive scatter context */
1530: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1531: from->n = nrecvs;
1532: for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1533: PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1534: PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1535: from->starts[0] = 0;
1536: for(n = 0; n < from->n; n++) {
1537: if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1538: from->starts[n+1] = from->starts[n] + recvSizes[n];
1539: from->procs[n] = recvProcs[n];
1540: for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1541: from->indices[i] = recvIdx[i];
1542: }
1543: }
1544: ctx->fromdata = (void *)from;
1546: /* No local scatter optimization */
1547: from->local.n = 0;
1548: from->local.vslots = 0;
1549: to->local.n = 0;
1550: to->local.vslots = 0;
1551: from->local.nonmatching_computed = PETSC_FALSE;
1552: from->local.n_nonmatching = 0;
1553: from->local.slots_nonmatching = 0;
1554: to->local.nonmatching_computed = PETSC_FALSE;
1555: to->local.n_nonmatching = 0;
1556: to->local.slots_nonmatching = 0;
1558: from->type = VEC_SCATTER_MPI_GENERAL;
1559: to->type = VEC_SCATTER_MPI_GENERAL;
1560: from->bs = bs;
1561: to->bs = bs;
1562: VecScatterCreateCommon_PtoS(from, to, ctx);
1563: return(0);
1564: }
1566: /*
1567: bs indicates how many elements there are in each block. Normally this would be 1.
1569: contains check that PetscMPIInt can handle the sizes needed
1570: */
1573: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1574: {
1575: VecScatter_MPI_General *from,*to;
1576: PetscMPIInt size,rank,imdex,tag,n;
1577: PetscInt *source = PETSC_NULL,*owners = PETSC_NULL;
1578: PetscInt *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1579: PetscMPIInt *nprocs = PETSC_NULL,nrecvs;
1580: PetscInt i,j,idx,nsends;
1581: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1582: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1583: PetscMPIInt *onodes1,*olengths1;
1584: MPI_Comm comm;
1585: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1586: MPI_Status recv_status,*send_status;
1587: PetscErrorCode ierr;
1590: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1591: PetscObjectGetComm((PetscObject)xin,&comm);
1592: MPI_Comm_rank(comm,&rank);
1593: MPI_Comm_size(comm,&size);
1594: owners = xin->map->range;
1595: VecGetSize(yin,&lengthy);
1596: VecGetSize(xin,&lengthx);
1598: /* first count number of contributors to each processor */
1599: PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1600: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1601: j = 0;
1602: nsends = 0;
1603: for (i=0; i<nx; i++) {
1604: idx = inidx[i];
1605: if (idx < owners[j]) j = 0;
1606: for (; j<size; j++) {
1607: if (idx < owners[j+1]) {
1608: if (!nprocs[j]++) nsends++;
1609: owner[i] = j;
1610: break;
1611: }
1612: }
1613: }
1614: nprocslocal = nprocs[rank];
1615: nprocs[rank] = 0;
1616: if (nprocslocal) nsends--;
1617: /* inform other processors of number of messages and max length*/
1618: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1619: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1620: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1621: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1623: /* post receives: */
1624: PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1625: count = 0;
1626: for (i=0; i<nrecvs; i++) {
1627: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1628: count += olengths1[i];
1629: }
1631: /* do sends:
1632: 1) starts[i] gives the starting index in svalues for stuff going to
1633: the ith processor
1634: */
1635: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1636: starts[0] = 0;
1637: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1638: for (i=0; i<nx; i++) {
1639: if (owner[i] != rank) {
1640: svalues[starts[owner[i]]++] = inidx[i];
1641: }
1642: }
1644: starts[0] = 0;
1645: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1646: count = 0;
1647: for (i=0; i<size; i++) {
1648: if (nprocs[i]) {
1649: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1650: }
1651: }
1653: /* wait on receives */
1654: count = nrecvs;
1655: slen = 0;
1656: while (count) {
1657: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1658: /* unpack receives into our local space */
1659: MPI_Get_count(&recv_status,MPIU_INT,&n);
1660: slen += n;
1661: count--;
1662: }
1664: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1665:
1666: /* allocate entire send scatter context */
1667: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1668: to->n = nrecvs;
1669: PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1670: PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1671: PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1672: ctx->todata = (void*)to;
1673: to->starts[0] = 0;
1675: if (nrecvs) {
1677: /* move the data into the send scatter */
1678: base = owners[rank];
1679: rsvalues = rvalues;
1680: for (i=0; i<nrecvs; i++) {
1681: to->starts[i+1] = to->starts[i] + olengths1[i];
1682: to->procs[i] = onodes1[i];
1683: values = rsvalues;
1684: rsvalues += olengths1[i];
1685: for (j=0; j<olengths1[i]; j++) {
1686: to->indices[to->starts[i] + j] = values[j] - base;
1687: }
1688: }
1689: }
1690: PetscFree(olengths1);
1691: PetscFree(onodes1);
1692: PetscFree3(rvalues,source,recv_waits);
1694: /* allocate entire receive scatter context */
1695: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1696: from->n = nsends;
1698: PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1699: PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1700: ctx->fromdata = (void*)from;
1702: /* move data into receive scatter */
1703: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1704: count = 0; from->starts[0] = start[0] = 0;
1705: for (i=0; i<size; i++) {
1706: if (nprocs[i]) {
1707: lowner[i] = count;
1708: from->procs[count++] = i;
1709: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1710: }
1711: }
1713: for (i=0; i<nx; i++) {
1714: if (owner[i] != rank) {
1715: from->indices[start[lowner[owner[i]]]++] = inidy[i];
1716: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1717: }
1718: }
1719: PetscFree2(lowner,start);
1720: PetscFree2(nprocs,owner);
1721:
1722: /* wait on sends */
1723: if (nsends) {
1724: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1725: MPI_Waitall(nsends,send_waits,send_status);
1726: PetscFree(send_status);
1727: }
1728: PetscFree3(svalues,send_waits,starts);
1730: if (nprocslocal) {
1731: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1732: /* we have a scatter to ourselves */
1733: PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1734: PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1735: nt = 0;
1736: for (i=0; i<nx; i++) {
1737: idx = inidx[i];
1738: if (idx >= owners[rank] && idx < owners[rank+1]) {
1739: to->local.vslots[nt] = idx - owners[rank];
1740: from->local.vslots[nt++] = inidy[i];
1741: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1742: }
1743: }
1744: } else {
1745: from->local.n = 0;
1746: from->local.vslots = 0;
1747: to->local.n = 0;
1748: to->local.vslots = 0;
1749: }
1751: from->local.nonmatching_computed = PETSC_FALSE;
1752: from->local.n_nonmatching = 0;
1753: from->local.slots_nonmatching = 0;
1754: to->local.nonmatching_computed = PETSC_FALSE;
1755: to->local.n_nonmatching = 0;
1756: to->local.slots_nonmatching = 0;
1758: from->type = VEC_SCATTER_MPI_GENERAL;
1759: to->type = VEC_SCATTER_MPI_GENERAL;
1760: from->bs = bs;
1761: to->bs = bs;
1762: VecScatterCreateCommon_PtoS(from,to,ctx);
1763: return(0);
1764: }
1766: /*
1767: bs indicates how many elements there are in each block. Normally this would be 1.
1768: */
1771: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1772: {
1773: MPI_Comm comm = ((PetscObject)ctx)->comm;
1774: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1775: PetscInt bs = to->bs;
1776: PetscMPIInt size;
1777: PetscInt i, n;
1779:
1781: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1782: ctx->destroy = VecScatterDestroy_PtoP;
1784: ctx->reproduce = PETSC_FALSE;
1785: to->sendfirst = PETSC_FALSE;
1786: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce,PETSC_NULL);
1787: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst,PETSC_NULL);
1788: from->sendfirst = to->sendfirst;
1790: MPI_Comm_size(comm,&size);
1791: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1792: to->contiq = PETSC_FALSE;
1793: n = from->starts[from->n];
1794: from->contiq = PETSC_TRUE;
1795: for (i=1; i<n; i++) {
1796: if (from->indices[i] != from->indices[i-1] + bs) {
1797: from->contiq = PETSC_FALSE;
1798: break;
1799: }
1800: }
1802: to->use_alltoallv = PETSC_FALSE;
1803: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv,PETSC_NULL);
1804: from->use_alltoallv = to->use_alltoallv;
1805: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1806: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1807: if (to->use_alltoallv) {
1808: to->use_alltoallw = PETSC_FALSE;
1809: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw,PETSC_NULL);
1810: }
1811: from->use_alltoallw = to->use_alltoallw;
1812: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1813: #endif
1815: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1816: to->use_window = PETSC_FALSE;
1817: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_window",&to->use_window,PETSC_NULL);
1818: from->use_window = to->use_window;
1819: #endif
1821: if (to->use_alltoallv) {
1823: PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1824: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1825: for (i=0; i<to->n; i++) {
1826: to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1827: }
1828: to->displs[0] = 0;
1829: for (i=1; i<size; i++) {
1830: to->displs[i] = to->displs[i-1] + to->counts[i-1];
1831: }
1833: PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1834: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1835: for (i=0; i<from->n; i++) {
1836: from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1837: }
1838: from->displs[0] = 0;
1839: for (i=1; i<size; i++) {
1840: from->displs[i] = from->displs[i-1] + from->counts[i-1];
1841: }
1842: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1843: if (to->use_alltoallw) {
1844: PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1845: ctx->packtogether = PETSC_FALSE;
1846: PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1847: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1848: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1849: for (i=0; i<size; i++) {
1850: to->types[i] = MPIU_SCALAR;
1851: }
1853: for (i=0; i<to->n; i++) {
1854: to->wcounts[to->procs[i]] = 1;
1855: mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1856: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1857: MPI_Type_commit(to->types+to->procs[i]);
1858: }
1859: PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1860: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1861: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1862: for (i=0; i<size; i++) {
1863: from->types[i] = MPIU_SCALAR;
1864: }
1865: if (from->contiq) {
1866: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1867: for (i=0; i<from->n; i++) {
1868: from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1869: }
1870: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1871: for (i=1; i<from->n; i++) {
1872: from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1873: }
1874: } else {
1875: for (i=0; i<from->n; i++) {
1876: from->wcounts[from->procs[i]] = 1;
1877: mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1878: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1879: MPI_Type_commit(from->types+from->procs[i]);
1880: }
1881: }
1882: } else {
1883: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1884: }
1885: #else
1886: to->use_alltoallw = PETSC_FALSE;
1887: from->use_alltoallw = PETSC_FALSE;
1888: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1889: #endif
1890: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1891: } else if (to->use_window) {
1892: PetscMPIInt temptag,winsize;
1893: MPI_Request *request;
1894: MPI_Status *status;
1895:
1896: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1897: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
1898: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1899: PetscMalloc(to->n,&to->winstarts);
1900: PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1901: for (i=0; i<to->n; i++) {
1902: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1903: }
1904: for (i=0; i<from->n; i++) {
1905: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1906: }
1907: MPI_Waitall(to->n,request,status);
1908: PetscFree2(request,status);
1910: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
1911: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1912: PetscMalloc(from->n,&from->winstarts);
1913: PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1914: for (i=0; i<from->n; i++) {
1915: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1916: }
1917: for (i=0; i<to->n; i++) {
1918: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1919: }
1920: MPI_Waitall(from->n,request,status);
1921: PetscFree2(request,status);
1922: #endif
1923: } else {
1924: PetscTruth use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1925: PetscInt *sstarts = to->starts, *rstarts = from->starts;
1926: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
1927: MPI_Request *swaits = to->requests,*rwaits = from->requests;
1928: MPI_Request *rev_swaits,*rev_rwaits;
1929: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
1931: /* allocate additional wait variables for the "reverse" scatter */
1932: PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1933: PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1934: to->rev_requests = rev_rwaits;
1935: from->rev_requests = rev_swaits;
1937: /* Register the receives that you will use later (sends for scatter reverse) */
1938: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&use_rsend,PETSC_NULL);
1939: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&use_ssend,PETSC_NULL);
1940: if (use_rsend) {
1941: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1942: to->use_readyreceiver = PETSC_TRUE;
1943: from->use_readyreceiver = PETSC_TRUE;
1944: } else {
1945: to->use_readyreceiver = PETSC_FALSE;
1946: from->use_readyreceiver = PETSC_FALSE;
1947: }
1948: if (use_ssend) {
1949: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1950: }
1952: for (i=0; i<from->n; i++) {
1953: if (use_rsend) {
1954: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1955: } else if (use_ssend) {
1956: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1957: } else {
1958: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1959: }
1960: }
1962: for (i=0; i<to->n; i++) {
1963: if (use_rsend) {
1964: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1965: } else if (use_ssend) {
1966: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1967: } else {
1968: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1969: }
1970: }
1971: /* Register receives for scatter and reverse */
1972: for (i=0; i<from->n; i++) {
1973: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1974: }
1975: for (i=0; i<to->n; i++) {
1976: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1977: }
1978: if (use_rsend) {
1979: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1980: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1981: MPI_Barrier(comm);
1982: }
1984: ctx->copy = VecScatterCopy_PtoP_X;
1985: }
1986: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1987: switch (bs) {
1988: case 12:
1989: ctx->begin = VecScatterBegin_12;
1990: ctx->end = VecScatterEnd_12;
1991: break;
1992: case 8:
1993: ctx->begin = VecScatterBegin_8;
1994: ctx->end = VecScatterEnd_8;
1995: break;
1996: case 7:
1997: ctx->begin = VecScatterBegin_7;
1998: ctx->end = VecScatterEnd_7;
1999: break;
2000: case 6:
2001: ctx->begin = VecScatterBegin_6;
2002: ctx->end = VecScatterEnd_6;
2003: break;
2004: case 5:
2005: ctx->begin = VecScatterBegin_5;
2006: ctx->end = VecScatterEnd_5;
2007: break;
2008: case 4:
2009: ctx->begin = VecScatterBegin_4;
2010: ctx->end = VecScatterEnd_4;
2011: break;
2012: case 3:
2013: ctx->begin = VecScatterBegin_3;
2014: ctx->end = VecScatterEnd_3;
2015: break;
2016: case 2:
2017: ctx->begin = VecScatterBegin_2;
2018: ctx->end = VecScatterEnd_2;
2019: break;
2020: case 1:
2021: ctx->begin = VecScatterBegin_1;
2022: ctx->end = VecScatterEnd_1;
2023: break;
2024: default:
2025: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2026: }
2027: ctx->view = VecScatterView_MPI;
2028: /* Check if the local scatter is actually a copy; important special case */
2029: if (to->local.n) {
2030: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2031: }
2032: return(0);
2033: }
2037: /* ------------------------------------------------------------------------------------*/
2038: /*
2039: Scatter from local Seq vectors to a parallel vector.
2040: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2041: reverses the result.
2042: */
2045: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2046: {
2047: PetscErrorCode ierr;
2048: MPI_Request *waits;
2049: VecScatter_MPI_General *to,*from;
2052: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2053: to = (VecScatter_MPI_General*)ctx->fromdata;
2054: from = (VecScatter_MPI_General*)ctx->todata;
2055: ctx->todata = (void*)to;
2056: ctx->fromdata = (void*)from;
2057: /* these two are special, they are ALWAYS stored in to struct */
2058: to->sstatus = from->sstatus;
2059: to->rstatus = from->rstatus;
2061: from->sstatus = 0;
2062: from->rstatus = 0;
2064: waits = from->rev_requests;
2065: from->rev_requests = from->requests;
2066: from->requests = waits;
2067: waits = to->rev_requests;
2068: to->rev_requests = to->requests;
2069: to->requests = waits;
2070: return(0);
2071: }
2073: /* ---------------------------------------------------------------------------------*/
2076: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2077: {
2079: PetscMPIInt size,rank,tag,imdex,n;
2080: PetscInt *owners = xin->map->range;
2081: PetscMPIInt *nprocs = PETSC_NULL;
2082: PetscInt i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2083: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2084: PetscInt *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2085: PetscMPIInt *onodes1,*olengths1,nrecvs;
2086: MPI_Comm comm;
2087: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2088: MPI_Status recv_status,*send_status = PETSC_NULL;
2089: PetscTruth duplicate = PETSC_FALSE;
2090: #if defined(PETSC_USE_DEBUG)
2091: PetscTruth found = PETSC_FALSE;
2092: #endif
2095: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2096: PetscObjectGetComm((PetscObject)xin,&comm);
2097: MPI_Comm_size(comm,&size);
2098: MPI_Comm_rank(comm,&rank);
2099: if (size == 1) {
2100: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2101: return(0);
2102: }
2104: /*
2105: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2106: They then call the StoPScatterCreate()
2107: */
2108: /* first count number of contributors to each processor */
2109: PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2110: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2111: lastidx = -1;
2112: j = 0;
2113: for (i=0; i<nx; i++) {
2114: /* if indices are NOT locally sorted, need to start search at the beginning */
2115: if (lastidx > (idx = inidx[i])) j = 0;
2116: lastidx = idx;
2117: for (; j<size; j++) {
2118: if (idx >= owners[j] && idx < owners[j+1]) {
2119: nprocs[j]++;
2120: owner[i] = j;
2121: #if defined(PETSC_USE_DEBUG)
2122: found = PETSC_TRUE;
2123: #endif
2124: break;
2125: }
2126: }
2127: #if defined(PETSC_USE_DEBUG)
2128: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2129: found = PETSC_FALSE;
2130: #endif
2131: }
2132: nsends = 0; for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}
2134: /* inform other processors of number of messages and max length*/
2135: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2136: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2137: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2138: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2140: /* post receives: */
2141: PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2143: count = 0;
2144: for (i=0; i<nrecvs; i++) {
2145: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2146: count += olengths1[i];
2147: }
2148: PetscFree(onodes1);
2150: /* do sends:
2151: 1) starts[i] gives the starting index in svalues for stuff going to
2152: the ith processor
2153: */
2154: starts[0]= 0;
2155: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2156: for (i=0; i<nx; i++) {
2157: svalues[2*starts[owner[i]]] = inidx[i];
2158: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2159: }
2161: starts[0] = 0;
2162: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2163: count = 0;
2164: for (i=0; i<size; i++) {
2165: if (nprocs[i]) {
2166: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2167: count++;
2168: }
2169: }
2170: PetscFree3(nprocs,owner,starts);
2172: /* wait on receives */
2173: count = nrecvs;
2174: slen = 0;
2175: while (count) {
2176: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2177: /* unpack receives into our local space */
2178: MPI_Get_count(&recv_status,MPIU_INT,&n);
2179: slen += n/2;
2180: count--;
2181: }
2182: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2183:
2184: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2185: base = owners[rank];
2186: count = 0;
2187: rsvalues = rvalues;
2188: for (i=0; i<nrecvs; i++) {
2189: values = rsvalues;
2190: rsvalues += 2*olengths1[i];
2191: for (j=0; j<olengths1[i]; j++) {
2192: local_inidx[count] = values[2*j] - base;
2193: local_inidy[count++] = values[2*j+1];
2194: }
2195: }
2196: PetscFree(olengths1);
2198: /* wait on sends */
2199: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2200: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2202: /*
2203: should sort and remove duplicates from local_inidx,local_inidy
2204: */
2206: #if defined(do_it_slow)
2207: /* sort on the from index */
2208: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2209: start = 0;
2210: while (start < slen) {
2211: count = start+1;
2212: last = local_inidx[start];
2213: while (count < slen && last == local_inidx[count]) count++;
2214: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2215: /* sort on to index */
2216: PetscSortInt(count-start,local_inidy+start);
2217: }
2218: /* remove duplicates; not most efficient way, but probably good enough */
2219: i = start;
2220: while (i < count-1) {
2221: if (local_inidy[i] != local_inidy[i+1]) {
2222: i++;
2223: } else { /* found a duplicate */
2224: duplicate = PETSC_TRUE;
2225: for (j=i; j<slen-1; j++) {
2226: local_inidx[j] = local_inidx[j+1];
2227: local_inidy[j] = local_inidy[j+1];
2228: }
2229: slen--;
2230: count--;
2231: }
2232: }
2233: start = count;
2234: }
2235: #endif
2236: if (duplicate) {
2237: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2238: }
2239: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2240: PetscFree2(local_inidx,local_inidy);
2241: return(0);
2242: }