Actual source code: mpicuda.cu

petsc-3.7.3 2016-07-24
Report Typos and Errors
  2: /*
  3:    This file contains routines for Parallel vector operations.
  4:  */
  5: #define PETSC_SKIP_SPINLOCK

  7: #include <petscconf.h>
  8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
  9: #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>

 13: PetscErrorCode VecDestroy_MPICUDA(Vec v)
 14: {
 16:   cudaError_t    err;

 19:   if (v->spptr) {
 20:     if (((Vec_CUDA*)v->spptr)->GPUarray) {
 21:       err = cudaFree(((Vec_CUDA*)v->spptr)->GPUarray);CHKERRCUDA(err);
 22:       ((Vec_CUDA*)v->spptr)->GPUarray = NULL;
 23:     }
 24:     err = cudaStreamDestroy(((Vec_CUDA*)v->spptr)->stream);CHKERRCUDA(err);
 25:     PetscFree(v->spptr);
 26:   }
 27:   VecDestroy_MPI(v);
 28:   return(0);
 29: }

 33: PetscErrorCode VecNorm_MPICUDA(Vec xin,NormType type,PetscReal *z)
 34: {
 35:   PetscReal      sum,work = 0.0;

 39:   if (type == NORM_2 || type == NORM_FROBENIUS) {
 40:     VecNorm_SeqCUDA(xin,NORM_2,&work);
 41:     work *= work;
 42:     MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 43:     *z    = PetscSqrtReal(sum);
 44:   } else if (type == NORM_1) {
 45:     /* Find the local part */
 46:     VecNorm_SeqCUDA(xin,NORM_1,&work);
 47:     /* Find the global max */
 48:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 49:   } else if (type == NORM_INFINITY) {
 50:     /* Find the local max */
 51:     VecNorm_SeqCUDA(xin,NORM_INFINITY,&work);
 52:     /* Find the global max */
 53:     MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
 54:   } else if (type == NORM_1_AND_2) {
 55:     PetscReal temp[2];
 56:     VecNorm_SeqCUDA(xin,NORM_1,temp);
 57:     VecNorm_SeqCUDA(xin,NORM_2,temp+1);
 58:     temp[1] = temp[1]*temp[1];
 59:     MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 60:     z[1] = PetscSqrtReal(z[1]);
 61:   }
 62:   return(0);
 63: }

 67: PetscErrorCode VecDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
 68: {
 69:   PetscScalar    sum,work;

 73:   VecDot_SeqCUDA(xin,yin,&work);
 74:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 75:   *z   = sum;
 76:   return(0);
 77: }

 81: PetscErrorCode VecTDot_MPICUDA(Vec xin,Vec yin,PetscScalar *z)
 82: {
 83:   PetscScalar    sum,work;

 87:   VecTDot_SeqCUDA(xin,yin,&work);
 88:   MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
 89:   *z   = sum;
 90:   return(0);
 91: }

 95: PetscErrorCode VecMDot_MPICUDA(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
 96: {
 97:   PetscScalar    awork[128],*work = awork;

101:   if (nv > 128) {
102:     PetscMalloc1(nv,&work);
103:   }
104:   VecMDot_SeqCUDA(xin,nv,y,work);
105:   MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
106:   if (nv > 128) {
107:     PetscFree(work);
108:   }
109:   return(0);
110: }

112: /*MC
113:    VECMPICUDA - VECMPICUDA = "mpicuda" - The basic parallel vector, modified to use CUDA

115:    Options Database Keys:
116: . -vec_type mpicuda - sets the vector type to VECMPICUDA during a call to VecSetFromOptions()

118:   Level: beginner

120: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI()
121: M*/


126: PetscErrorCode VecDuplicate_MPICUDA(Vec win,Vec *v)
127: {
129:   Vec_MPI        *vw,*w = (Vec_MPI*)win->data;
130:   PetscScalar    *array;

133:   VecCreate(PetscObjectComm((PetscObject)win),v);
134:   PetscLayoutReference(win->map,&(*v)->map);

136:   VecCreate_MPI_Private(*v,PETSC_FALSE,w->nghost,0);
137:   vw   = (Vec_MPI*)(*v)->data;
138:   PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));

140:   /* save local representation of the parallel vector (and scatter) if it exists */
141:   if (w->localrep) {
142:     VecGetArray(*v,&array);
143:     VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
144:     PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
145:     VecRestoreArray(*v,&array);
146:     PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
147:     vw->localupdate = w->localupdate;
148:     if (vw->localupdate) {
149:       PetscObjectReference((PetscObject)vw->localupdate);
150:     }
151:   }

153:   /* New vector should inherit stashing property of parent */
154:   (*v)->stash.donotstash   = win->stash.donotstash;
155:   (*v)->stash.ignorenegidx = win->stash.ignorenegidx;

157:   /* change type_name appropriately */
158:   PetscObjectChangeTypeName((PetscObject)(*v),VECMPICUDA);

160:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
161:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
162:   (*v)->map->bs   = PetscAbs(win->map->bs);
163:   (*v)->bstash.bs = win->bstash.bs;
164:   return(0);
165: }

169: PetscErrorCode VecDotNorm2_MPICUDA(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
170: {
172:   PetscScalar    work[2],sum[2];

175:   VecDotNorm2_SeqCUDA(s,t,work,work+1);
176:   MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
177:   *dp  = sum[0];
178:   *nm  = sum[1];
179:   return(0);
180: }

184: PETSC_EXTERN PetscErrorCode VecCreate_MPICUDA(Vec vv)
185: {

189:   VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
190:   PetscObjectChangeTypeName((PetscObject)vv,VECMPICUDA);

192:   vv->ops->dotnorm2               = VecDotNorm2_MPICUDA;
193:   vv->ops->waxpy                  = VecWAXPY_SeqCUDA;
194:   vv->ops->duplicate              = VecDuplicate_MPICUDA;
195:   vv->ops->dot                    = VecDot_MPICUDA;
196:   vv->ops->mdot                   = VecMDot_MPICUDA;
197:   vv->ops->tdot                   = VecTDot_MPICUDA;
198:   vv->ops->norm                   = VecNorm_MPICUDA;
199:   vv->ops->scale                  = VecScale_SeqCUDA;
200:   vv->ops->copy                   = VecCopy_SeqCUDA;
201:   vv->ops->set                    = VecSet_SeqCUDA;
202:   vv->ops->swap                   = VecSwap_SeqCUDA;
203:   vv->ops->axpy                   = VecAXPY_SeqCUDA;
204:   vv->ops->axpby                  = VecAXPBY_SeqCUDA;
205:   vv->ops->maxpy                  = VecMAXPY_SeqCUDA;
206:   vv->ops->aypx                   = VecAYPX_SeqCUDA;
207:   vv->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
208:   vv->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
209:   vv->ops->setrandom              = VecSetRandom_SeqCUDA;
210:   vv->ops->placearray             = VecPlaceArray_SeqCUDA;
211:   vv->ops->replacearray           = VecReplaceArray_SeqCUDA;
212:   vv->ops->resetarray             = VecResetArray_SeqCUDA;
213:   vv->ops->dot_local              = VecDot_SeqCUDA;
214:   vv->ops->tdot_local             = VecTDot_SeqCUDA;
215:   vv->ops->norm_local             = VecNorm_SeqCUDA;
216:   vv->ops->mdot_local             = VecMDot_SeqCUDA;
217:   vv->ops->destroy                = VecDestroy_MPICUDA;
218:   vv->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
219:   vv->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
220:   vv->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
221:   vv->ops->getlocalvectorread     = VecGetLocalVector_SeqCUDA;
222:   vv->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;
223:   VecCUDAAllocateCheck(vv);CHKERRCUDA(ierr);
224:   vv->valid_GPU_array      = PETSC_CUDA_GPU;
225:   VecSet(vv,0.0);
226:   return(0);
227: }

231: PETSC_EXTERN PetscErrorCode VecCreate_CUDA(Vec v)
232: {
234:   PetscMPIInt    size;

237:   MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
238:   if (size == 1) {
239:     VecSetType(v,VECSEQCUDA);
240:   } else {
241:     VecSetType(v,VECMPICUDA);
242:   }
243:   return(0);
244: }