Actual source code: gmreig.c

petsc-3.7.5 2017-01-01
Report Typos and Errors
  2: #include <../src/ksp/ksp/impls/gmres/gmresimpl.h>
  3: #include <petscblaslapack.h>

  7: PetscErrorCode KSPComputeExtremeSingularValues_GMRES(KSP ksp,PetscReal *emax,PetscReal *emin)
  8: {
  9: #if defined(PETSC_MISSING_LAPACK_GESVD)
 11:   /*
 12:       The Cray math libraries on T3D/T3E, and early versions of Intel Math Kernel Libraries (MKL)
 13:       for PCs do not seem to have the DGESVD() lapack routines
 14:   */
 15:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
 16: #else
 17:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 19:   PetscInt       n = gmres->it + 1,i,N = gmres->max_k + 2;
 20:   PetscBLASInt   bn, bN,lwork, idummy,lierr;
 21:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N,sdummy;
 22:   PetscReal      *realpart = gmres->Dsvd;

 25:   PetscBLASIntCast(n,&bn);
 26:   PetscBLASIntCast(N,&bN);
 27:   PetscBLASIntCast(5*N,&lwork);
 28:   PetscBLASIntCast(N,&idummy);
 29:   if (n <= 0) {
 30:     *emax = *emin = 1.0;
 31:     return(0);
 32:   }
 33:   /* copy R matrix to work space */
 34:   PetscMemcpy(R,gmres->hh_origin,(gmres->max_k+2)*(gmres->max_k+1)*sizeof(PetscScalar));

 36:   /* zero below diagonal garbage */
 37:   for (i=0; i<n; i++) R[i*N+i+1] = 0.0;

 39:   /* compute Singular Values */
 40:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 41: #if !defined(PETSC_USE_COMPLEX)
 42:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
 43: #else
 44:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&bn,&bn,R,&bN,realpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,realpart+N,&lierr));
 45: #endif
 46:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SVD Lapack routine %d",(int)lierr);
 47:   PetscFPTrapPop();

 49:   *emin = realpart[n-1];
 50:   *emax = realpart[0];
 51: #endif
 52:   return(0);
 53: }

 55: /* ------------------------------------------------------------------------ */
 56: /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
 59: PetscErrorCode KSPComputeEigenvalues_GMRES(KSP ksp,PetscInt nmax,PetscReal *r,PetscReal *c,PetscInt *neig)
 60: {
 61: #if defined(PETSC_HAVE_ESSL)
 62:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
 64:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1;
 65:   PetscInt       i,*perm;
 66:   PetscScalar    *R     = gmres->Rsvd;
 67:   PetscScalar    *cwork = R + N*N,sdummy;
 68:   PetscReal      *work,*realpart = gmres->Dsvd;
 69:   PetscBLASInt   zero = 0,bn,bN,idummy,lwork;

 72:   PetscBLASIntCast(n,&bn);
 73:   PetscBLASIntCast(N,&bN);
 74:   idummy = -1;                  /* unused */
 75:   PetscBLASIntCast(5*N,&lwork);
 76:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
 77:   *neig = n;

 79:   if (!n) return(0);

 81:   /* copy R matrix to work space */
 82:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

 84:   /* compute eigenvalues */

 86:   /* for ESSL version need really cwork of length N (complex), 2N
 87:      (real); already at least 5N of space has been allocated */

 89:   PetscMalloc1(lwork,&work);
 90:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 91:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,R,&bN,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
 92:   PetscFPTrapPop();
 93:   PetscFree(work);

 95:   /* For now we stick with the convention of storing the real and imaginary
 96:      components of evalues separately.  But is this what we really want? */
 97:   PetscMalloc1(n,&perm);

 99: #if !defined(PETSC_USE_COMPLEX)
100:   for (i=0; i<n; i++) {
101:     realpart[i] = cwork[2*i];
102:     perm[i]     = i;
103:   }
104:   PetscSortRealWithPermutation(n,realpart,perm);
105:   for (i=0; i<n; i++) {
106:     r[i] = cwork[2*perm[i]];
107:     c[i] = cwork[2*perm[i]+1];
108:   }
109: #else
110:   for (i=0; i<n; i++) {
111:     realpart[i] = PetscRealPart(cwork[i]);
112:     perm[i]     = i;
113:   }
114:   PetscSortRealWithPermutation(n,realpart,perm);
115:   for (i=0; i<n; i++) {
116:     r[i] = PetscRealPart(cwork[perm[i]]);
117:     c[i] = PetscImaginaryPart(cwork[perm[i]]);
118:   }
119: #endif
120:   PetscFree(perm);
121: #elif defined(PETSC_MISSING_LAPACK_GEEV)
123:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
124: #elif !defined(PETSC_USE_COMPLEX)
125:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
127:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
128:   PetscBLASInt   bn, bN, lwork, idummy, lierr;
129:   PetscScalar    *R        = gmres->Rsvd,*work = R + N*N;
130:   PetscScalar    *realpart = gmres->Dsvd,*imagpart = realpart + N,sdummy;

133:   PetscBLASIntCast(n,&bn);
134:   PetscBLASIntCast(N,&bN);
135:   PetscBLASIntCast(5*N,&lwork);
136:   PetscBLASIntCast(N,&idummy);
137:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
138:   *neig = n;

140:   if (!n) return(0);

142:   /* copy R matrix to work space */
143:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

145:   /* compute eigenvalues */
146:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
147:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
148:   if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
149:   PetscFPTrapPop();
150:   PetscMalloc1(n,&perm);
151:   for (i=0; i<n; i++) perm[i] = i;
152:   PetscSortRealWithPermutation(n,realpart,perm);
153:   for (i=0; i<n; i++) {
154:     r[i] = realpart[perm[i]];
155:     c[i] = imagpart[perm[i]];
156:   }
157:   PetscFree(perm);
158: #else
159:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
161:   PetscInt       n  = gmres->it + 1,N = gmres->max_k + 1,i,*perm;
162:   PetscScalar    *R = gmres->Rsvd,*work = R + N*N,*eigs = work + 5*N,sdummy;
163:   PetscBLASInt   bn,bN,lwork,idummy,lierr;

166:   PetscBLASIntCast(n,&bn);
167:   PetscBLASIntCast(N,&bN);
168:   PetscBLASIntCast(5*N,&lwork);
169:   PetscBLASIntCast(N,&idummy);
170:   if (nmax < n) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_SIZ,"Not enough room in work space r and c for eigenvalues");
171:   *neig = n;

173:   if (!n) return(0);

175:   /* copy R matrix to work space */
176:   PetscMemcpy(R,gmres->hes_origin,N*N*sizeof(PetscScalar));

178:   /* compute eigenvalues */
179:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
180:   PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,R,&bN,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,gmres->Dsvd,&lierr));
181:   if (lierr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
182:   PetscFPTrapPop();
183:   PetscMalloc1(n,&perm);
184:   for (i=0; i<n; i++) perm[i] = i;
185:   for (i=0; i<n; i++) r[i] = PetscRealPart(eigs[i]);
186:   PetscSortRealWithPermutation(n,r,perm);
187:   for (i=0; i<n; i++) {
188:     r[i] = PetscRealPart(eigs[perm[i]]);
189:     c[i] = PetscImaginaryPart(eigs[perm[i]]);
190:   }
191:   PetscFree(perm);
192: #endif
193:   return(0);
194: }

196: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_HAVE_ESSL)
199: PetscErrorCode KSPComputeRitz_GMRES(KSP ksp,PetscBool ritz,PetscBool small,PetscInt *nrit,Vec S[],PetscReal *tetar,PetscReal *tetai)
200: {
201:   KSP_GMRES      *gmres = (KSP_GMRES*)ksp->data;
203:   PetscInt       n = gmres->it + 1,N = gmres->max_k + 1,NbrRitz,nb=0;
204:   PetscInt       i,j,*perm;
205:   PetscReal      *H,*Q,*Ht;              /* H Hessenberg Matrix and Q matrix of eigenvectors of H*/
206:   PetscReal      *wr,*wi,*modul;       /* Real and imaginary part and modul of the Ritz values*/
207:   PetscReal      *SR,*work;
208:   PetscBLASInt   bn,bN,lwork,idummy;
209:   PetscScalar    *t,sdummy;

212:   /* n: size of the Hessenberg matrix */
213:   if (gmres->fullcycle) n = N-1;
214:   /* NbrRitz: number of (harmonic) Ritz pairs to extract */
215:   NbrRitz = PetscMin(*nrit,n);

217:   /* Definition of PetscBLASInt for lapack routines*/
218:   PetscBLASIntCast(n,&bn);
219:   PetscBLASIntCast(N,&bN);
220:   PetscBLASIntCast(N,&idummy);
221:   PetscBLASIntCast(5*N,&lwork);
222:   /* Memory allocation */
223:   PetscMalloc1(bN*bN,&H);
224:   PetscMalloc1(bn*bn,&Q);
225:   PetscMalloc1(lwork,&work);
226:   PetscMalloc1(n,&wr);
227:   PetscMalloc1(n,&wi);

229:   /* copy H matrix to work space */
230:   if (gmres->fullcycle) {
231:     PetscMemcpy(H,gmres->hes_ritz,bN*bN*sizeof(PetscReal));
232:   } else {
233:     PetscMemcpy(H,gmres->hes_origin,bN*bN*sizeof(PetscReal));
234:   }

236:   /* Modify H to compute Harmonic Ritz pairs H = H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
237:   if (!ritz) {
238:     /* Transpose the Hessenberg matrix => Ht */
239:     PetscMalloc1(bn*bn,&Ht);
240:     for (i=0; i<bn; i++) {
241:       for (j=0; j<bn; j++) {
242:         Ht[i*bn+j] = H[j*bN+i];
243:       }
244:     }
245:     /* Solve the system H^T*t = h^2_{m+1,m}e_m */
246:     PetscCalloc1(bn,&t);
247:     /* t = h^2_{m+1,m}e_m */
248:     if (gmres->fullcycle) {
249:       t[bn-1] = PetscSqr(gmres->hes_ritz[(bn-1)*bN+bn]);
250:     } else {
251:       t[bn-1] = PetscSqr(gmres->hes_origin[(bn-1)*bN+bn]);
252:     }
253:     /* Call the LAPACK routine dgesv to compute t = H^{-T}*t */
254: #if   defined(PETSC_MISSING_LAPACK_GESV)
255:     SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
256: #else
257:     {
258:       PetscBLASInt info;
259:       PetscBLASInt nrhs = 1;
260:       PetscBLASInt *ipiv;
261:       PetscMalloc1(bn,&ipiv);
262:       PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn,&nrhs,Ht,&bn,ipiv,t,&bn,&info));
263:       if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB,"Error while calling the Lapack routine DGESV");
264:       PetscFree(ipiv);
265:       PetscFree(Ht);
266:     }
267: #endif
268:     /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
269:     for (i=0; i<bn; i++) H[(bn-1)*bn+i] += t[i];
270:     PetscFree(t);
271:   }

273:   /* Compute (harmonic) Ritz pairs */
274: #if defined(PETSC_MISSING_LAPACK_HSEQR)
275:   SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
276: #else
277:   {
278:     PetscBLASInt info;
279:     PetscFPTrapPush(PETSC_FP_TRAP_OFF);
280:     PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","V",&bn,H,&bN,wr,wi,&sdummy,&idummy,Q,&bn,work,&lwork,&info));
281:     if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine");
282:   }
283: #endif
284:   /* sort the (harmonic) Ritz values */
285:   PetscMalloc1(n,&modul);
286:   PetscMalloc1(n,&perm);
287:   for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
288:   for (i=0; i<n; i++) perm[i] = i;
289:   PetscSortRealWithPermutation(n,modul,perm);
290:   /* count the number of extracted Ritz or Harmonic Ritz pairs (with complex conjugates) */
291:   if (small) {
292:     while (nb < NbrRitz) {
293:       if (!wi[perm[nb]]) nb += 1;
294:       else nb += 2;
295:     }
296:     PetscMalloc(nb*n*sizeof(PetscReal),&SR);
297:     for (i=0; i<nb; i++) {
298:       tetar[i] = wr[perm[i]];
299:       tetai[i] = wi[perm[i]];
300:       PetscMemcpy(&SR[i*n],&(Q[perm[i]*bn]),n*sizeof(PetscReal));
301:     }
302:   } else {
303:     while (nb < NbrRitz) {
304:       if (wi[perm[n-nb-1]] == 0) nb += 1;
305:       else nb += 2;
306:     }
307:     PetscMalloc(nb*n*sizeof(PetscReal),&SR);
308:     for (i=0; i<nb; i++) {
309:       tetar[i] = wr[perm[n-nb+i]];
310:       tetai[i] = wi[perm[n-nb+i]];
311:       PetscMemcpy(&SR[i*n], &(Q[perm[n-nb+i]*bn]), n*sizeof(PetscReal));
312:     }
313:   }
314:   PetscFree(modul);
315:   PetscFree(perm);

317:   /* Form the Ritz or Harmonic Ritz vectors S=VV*Sr, 
318:     where the columns of VV correspond to the basis of the Krylov subspace */
319:   if (gmres->fullcycle) {
320:     for (j=0; j<nb; j++) {
321:       VecZeroEntries(S[j]);
322:       VecMAXPY(S[j],n,&SR[j*n],gmres->vecb);
323:     }
324:   } else {
325:     for (j=0; j<nb; j++) {
326:       VecZeroEntries(S[j]);
327:       VecMAXPY(S[j],n,&SR[j*n],&VEC_VV(0));
328:     }
329:   }
330:   *nrit = nb;
331:   PetscFree(H);
332:   PetscFree(Q);
333:   PetscFree(SR);
334:   PetscFree(wr);
335:   PetscFree(wi);
336:   return(0);
337: }
338: #endif