Actual source code: pvec2.c
1: #define PETSCVEC_DLL
2: /*
3: Code for some of the parallel vector primatives.
4: */
5: #include ../src/vec/vec/impls/mpi/pvecimpl.h
6: #include petscblaslapack.h
10: PetscErrorCode VecMDot_MPI(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
11: {
12: PetscScalar awork[128],*work = awork;
16: if (nv > 128) {
17: PetscMalloc(nv*sizeof(PetscScalar),&work);
18: }
19: VecMDot_Seq(xin,nv,y,work);
20: MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
21: if (nv > 128) {
22: PetscFree(work);
23: }
24: return(0);
25: }
29: PetscErrorCode VecMTDot_MPI(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
30: {
31: PetscScalar awork[128],*work = awork;
35: if (nv > 128) {
36: PetscMalloc(nv*sizeof(PetscScalar),&work);
37: }
38: VecMTDot_Seq(xin,nv,y,work);
39: MPI_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,((PetscObject)xin)->comm);
40: if (nv > 128) {
41: PetscFree(work);
42: }
43: return(0);
44: }
46: #include "../src/vec/vec/impls/seq/ftn-kernels/fnorm.h"
49: PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z)
50: {
51: Vec_MPI *x = (Vec_MPI*)xin->data;
52: PetscReal sum,work = 0.0;
53: PetscScalar *xx = x->array;
55: PetscInt n = xin->map->n;
58: if (type == NORM_2 || type == NORM_FROBENIUS) {
60: #if defined(PETSC_HAVE_SLOW_BLAS_NORM2)
61: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
62: fortrannormsqr_(xx,&n,&work);
63: #elif defined(PETSC_USE_UNROLLED_NORM)
64: switch (n & 0x3) {
65: case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
66: case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++;
67: case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4;
68: }
69: while (n>0) {
70: work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+
71: xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3]));
72: xx += 4; n -= 4;
73: }
74: #else
75: {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));}
76: #endif
77: #else
78: {PetscBLASInt one = 1,bn = PetscBLASIntCast(n);
79: work = BLASnrm2_(&bn,xx,&one);
80: work *= work;
81: }
82: #endif
83: MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);
84: *z = sqrt(sum);
85: PetscLogFlops(2.0*xin->map->n);
86: } else if (type == NORM_1) {
87: /* Find the local part */
88: VecNorm_Seq(xin,NORM_1,&work);
89: /* Find the global max */
90: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);
91: } else if (type == NORM_INFINITY) {
92: /* Find the local max */
93: VecNorm_Seq(xin,NORM_INFINITY,&work);
94: /* Find the global max */
95: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);
96: } else if (type == NORM_1_AND_2) {
97: PetscReal temp[2];
98: VecNorm_Seq(xin,NORM_1,temp);
99: VecNorm_Seq(xin,NORM_2,temp+1);
100: temp[1] = temp[1]*temp[1];
101: MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);
102: z[1] = sqrt(z[1]);
103: }
104: return(0);
105: }
107: /*
108: These two functions are the MPI reduction operation used for max and min with index
109: The call below to MPI_Op_create() converts the function Vec[Max,Min]_Local() to the
110: MPI operator Vec[Max,Min]_Local_Op.
111: */
112: MPI_Op VecMax_Local_Op = 0;
113: MPI_Op VecMin_Local_Op = 0;
118: void MPIAPI VecMax_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
119: {
120: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
123: if (*datatype != MPIU_REAL) {
124: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
125: MPI_Abort(MPI_COMM_WORLD,1);
126: }
127: if (xin[0] > xout[0]) {
128: xout[0] = xin[0];
129: xout[1] = xin[1];
130: } else if (xin[0] == xout[0]) {
131: xout[1] = PetscMin(xin[1],xout[1]);
132: }
133: PetscFunctionReturnVoid(); /* cannot return a value */
134: }
140: void MPIAPI VecMin_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
141: {
142: PetscReal *xin = (PetscReal *)in,*xout = (PetscReal*)out;
145: if (*datatype != MPIU_REAL) {
146: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
147: MPI_Abort(MPI_COMM_WORLD,1);
148: }
149: if (xin[0] < xout[0]) {
150: xout[0] = xin[0];
151: xout[1] = xin[1];
152: } else if (xin[0] == xout[0]) {
153: xout[1] = PetscMin(xin[1],xout[1]);
154: }
155: PetscFunctionReturnVoid();
156: }
161: PetscErrorCode VecMax_MPI(Vec xin,PetscInt *idx,PetscReal *z)
162: {
164: PetscReal work;
167: /* Find the local max */
168: VecMax_Seq(xin,idx,&work);
170: /* Find the global max */
171: if (!idx) {
172: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);
173: } else {
174: PetscReal work2[2],z2[2];
175: PetscInt rstart;
176: rstart = xin->map->rstart;
177: work2[0] = work;
178: work2[1] = *idx + rstart;
179: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMax_Local_Op,((PetscObject)xin)->comm);
180: *z = z2[0];
181: *idx = (PetscInt)z2[1];
182: }
183: return(0);
184: }
188: PetscErrorCode VecMin_MPI(Vec xin,PetscInt *idx,PetscReal *z)
189: {
191: PetscReal work;
194: /* Find the local Min */
195: VecMin_Seq(xin,idx,&work);
197: /* Find the global Min */
198: if (!idx) {
199: MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MIN,((PetscObject)xin)->comm);
200: } else {
201: PetscReal work2[2],z2[2];
202: PetscInt rstart;
204: VecGetOwnershipRange(xin,&rstart,PETSC_NULL);
205: work2[0] = work;
206: work2[1] = *idx + rstart;
207: MPI_Allreduce(work2,z2,2,MPIU_REAL,VecMin_Local_Op,((PetscObject)xin)->comm);
208: *z = z2[0];
209: *idx = (PetscInt)z2[1];
210: }
211: return(0);
212: }