Actual source code: dsrtdf.c

slepc-3.7.3 2016-09-29
Report Typos and Errors
  1: /*
  2:    BDC - Block-divide and conquer (see description in README file).

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/dsimpl.h>
 25: #include <slepcblaslapack.h>

 27: PetscErrorCode BDC_dsrtdf_(PetscBLASInt *k,PetscBLASInt n,PetscBLASInt n1, 
 28:         PetscReal *d,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, 
 29:         PetscReal *rho,PetscReal *z,PetscReal *dlamda,PetscReal *w, 
 30:         PetscReal *q2,PetscBLASInt *indx,PetscBLASInt *indxc,PetscBLASInt *indxp, 
 31:         PetscBLASInt *coltyp,PetscReal reltol,PetscBLASInt *dz,PetscBLASInt *de, 
 32:         PetscBLASInt *info)
 33: {
 34: /*  -- Routine written in LAPACK Version 3.0 style -- */
 35: /* *************************************************** */
 36: /*     Written by */
 37: /*     Michael Moldaschl and Wilfried Gansterer */
 38: /*     University of Vienna */
 39: /*     last modification: March 16, 2014 */

 41: /*     Small adaptations of original code written by */
 42: /*     Wilfried Gansterer and Bob Ward, */
 43: /*     Department of Computer Science, University of Tennessee */
 44: /*     see http://dx.doi.org/10.1137/S1064827501399432 */
 45: /* *************************************************** */

 47: /*  Purpose */
 48: /*  ======= */

 50: /*  DSRTDF merges the two sets of eigenvalues of a rank-one modified */
 51: /*  diagonal matrix D+ZZ^T together into a single sorted set. Then it tries */
 52: /*  to deflate the size of the problem. */
 53: /*  There are two ways in which deflation can occur:  when two or more */
 54: /*  eigenvalues of D are close together or if there is a tiny entry in the */
 55: /*  Z vector.  For each such occurrence the order of the related secular */
 56: /*  equation problem is reduced by one. */

 58: /*  Arguments */
 59: /*  ========= */

 61: /*  K      (output) INTEGER */
 62: /*         The number of non-deflated eigenvalues, and the order of the */
 63: /*         related secular equation. 0 <= K <=N. */

 65: /*  N      (input) INTEGER */
 66: /*         The dimension of the diagonal matrix.  N >= 0. */

 68: /*  N1     (input) INTEGER */
 69: /*         The location of the last eigenvalue in the leading sub-matrix. */
 70: /*         min(1,N) <= N1 <= max(1,N). */

 72: /*  D      (input/output) DOUBLE PRECISION array, dimension (N) */
 73: /*         On entry, D contains the eigenvalues of the two submatrices to */
 74: /*         be combined. */
 75: /*         On exit, D contains the trailing (N-K) updated eigenvalues */
 76: /*         (those which were deflated) sorted into increasing order. */

 78: /*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N) */
 79: /*         On entry, Q contains the eigenvectors of two submatrices in */
 80: /*         the two square blocks with corners at (1,1), (N1,N1) */
 81: /*         and (N1+1, N1+1), (N,N). */
 82: /*         On exit, Q contains the trailing (N-K) updated eigenvectors */
 83: /*         (those which were deflated) in its last N-K columns. */

 85: /*  LDQ    (input) INTEGER */
 86: /*         The leading dimension of the array Q.  LDQ >= max(1,N). */

 88: /*  INDXQ  (input/output) INTEGER array, dimension (N) */
 89: /*         The permutation which separately sorts the two sub-problems */
 90: /*         in D into ascending order.  Note that elements in the second */
 91: /*         half of this permutation must first have N1 added to their */
 92: /*         values. Destroyed on exit. */

 94: /*  RHO    (input/output) DOUBLE PRECISION */
 95: /*         On entry, the off-diagonal element associated with the rank-1 */
 96: /*         cut which originally split the two submatrices which are now */
 97: /*         being recombined. */
 98: /*         On exit, RHO has been modified to the value required by */
 99: /*         DLAED3M (made positive and multiplied by two, such that */
100: /*         the modification vector has norm one). */

102: /*  Z      (input/output) DOUBLE PRECISION array, dimension (N) */
103: /*         On entry, Z contains the updating vector. */
104: /*         On exit, the contents of Z has been destroyed by the updating */
105: /*         process. */

107: /*  DLAMDA (output) DOUBLE PRECISION array, dimension (N) */
108: /*         A copy of the first K non-deflated eigenvalues which */
109: /*         will be used by DLAED3M to form the secular equation. */

111: /*  W      (output) DOUBLE PRECISION array, dimension (N) */
112: /*         The first K values of the final deflation-altered z-vector */
113: /*         which will be passed to DLAED3M. */

115: /*  Q2     (output) DOUBLE PRECISION array, dimension */
116: /*         ( N*N ) (if everything is deflated) or */
117: /*         ( N1*(COLTYP(1)+COLTYP(2)) + (N-N1)*(COLTYP(2)+COLTYP(3)) ) */
118: /*         (if not everything is deflated) */
119: /*         If everything is deflated, then N*N intermediate workspace */
120: /*         is needed in Q2. */
121: /*         If not everything is deflated, Q2 contains on exit a copy of the */
122: /*         first K non-deflated eigenvectors which will be used by */
123: /*         DLAED3M in a matrix multiply (DGEMM) to accumulate the new */
124: /*         eigenvectors, followed by the N-K deflated eigenvectors. */

126: /*  INDX   (workspace) INTEGER array, dimension (N) */
127: /*         The permutation used to sort the contents of DLAMDA into */
128: /*         ascending order. */

130: /*  INDXC  (output) INTEGER array, dimension (N) */
131: /*         The permutation used to arrange the columns of the deflated */
132: /*         Q matrix into three groups:  columns in the first group contain */
133: /*         non-zero elements only at and above N1 (type 1), columns in the */
134: /*         second group are dense (type 2), and columns in the third group */
135: /*         contain non-zero elements only below N1 (type 3). */

137: /*  INDXP  (workspace) INTEGER array, dimension (N) */
138: /*         The permutation used to place deflated values of D at the end */
139: /*         of the array.  INDXP(1:K) points to the nondeflated D-values */
140: /*         and INDXP(K+1:N) points to the deflated eigenvalues. */

142: /*  COLTYP (workspace/output) INTEGER array, dimension (N) */
143: /*         During execution, a label which will indicate which of the */
144: /*         following types a column in the Q2 matrix is: */
145: /*         1 : non-zero in the upper half only; */
146: /*         2 : dense; */
147: /*         3 : non-zero in the lower half only; */
148: /*         4 : deflated. */
149: /*         On exit, COLTYP(i) is the number of columns of type i, */
150: /*         for i=1 to 4 only. */

152: /*  RELTOL (input) DOUBLE PRECISION */
153: /*         User specified deflation tolerance. If RELTOL.LT.20*EPS, then */
154: /*         the standard value used in the original LAPACK routines is used. */

156: /*  DZ     (output) INTEGER, DZ.GE.0 */
157: /*         counts the deflation due to a small component in the modification */
158: /*         vector. */

160: /*  DE     (output) INTEGER, DE.GE.0 */
161: /*         counts the deflation due to close eigenvalues. */

163: /*  INFO   (output) INTEGER */
164: /*          = 0:  successful exit. */
165: /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

167: /*  Further Details */
168: /*  =============== */

170: /*  Based on code written by */
171: /*  Wilfried Gansterer and Bob Ward, */
172: /*  Department of Computer Science, University of Tennessee */

174: /*  Based on the design of the LAPACK code DLAED2 with modifications */
175: /*  to allow a block divide and conquer scheme */

177: /*  DLAED2 was written by Jeff Rutter, Computer Science Division, University */
178: /*  of California at Berkeley, USA, and modified by Francoise Tisseur, */
179: /*  University of Tennessee. */

181: /*  ===================================================================== */

183: #if defined(SLEPC_MISSING_LAPACK_LAMRG) || defined(SLEPC_MISSING_LAPACK_LACPY)
185:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAMRG/LACPY - Lapack routine is unavailable");
186: #else
187:   PetscReal    c, s, t, eps, tau, tol, dmax, dmone = -1.;
188:   PetscBLASInt i, j, i1, k2, n2, ct, nj, pj=0, js, iq1, iq2;
189:   PetscBLASInt psm[4], imax, jmax, ctot[4], factmp=1, one=1;

192:   *info = 0;

194:   if (n < 0) {
195:     *info = -2;
196:   } else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) {
197:     *info = -3;
198:   } else if (ldq < PetscMax(1,n)) {
199:     *info = -6;
200:   }
201:   if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DSRTDF",-(*info));

203:   /* Initialize deflation counters */

205:   *dz = 0;
206:   *de = 0;

208: /* **************************************************************************** */

210:   /* Quick return if possible */

212:   if (n == 0) return(0);

214: /* **************************************************************************** */

216:   n2 = n - n1;

218:   /* Modify Z so that RHO is positive. */

220:   if (*rho < 0.) {
221:     PetscStackCallBLAS("BLASscal",BLASscal_(&n2, &dmone, &z[n1], &one));
222:   }

224:   /* Normalize z so that norm2(z) = 1.  Since z is the concatenation of */
225:   /* two normalized vectors, norm2(z) = sqrt(2). (NOTE that this holds also */
226:   /* here in the approximate block-tridiagonal D&C because the two vectors are */
227:   /* singular vectors, but it would not necessarily hold in a D&C for a */
228:   /* general banded matrix !) */

230:   t = 1. / PetscSqrtReal(2.);
231:   PetscStackCallBLAS("BLASscal",BLASscal_(&n, &t, z, &one));

233:   /* NOTE: at this point the value of RHO is modified in order to */
234:   /*       normalize Z:    RHO = ABS( norm2(z)**2 * RHO */
235:   /*       in our case:    norm2(z) = sqrt(2), and therefore: */

237:   *rho = PetscAbs(*rho * 2.);

239:   /* Sort the eigenvalues into increasing order */

241:   for (i = n1; i < n; ++i) indxq[i] += n1;

243:   /* re-integrate the deflated parts from the last pass */

245:   for (i = 0; i < n; ++i) dlamda[i] = d[indxq[i]-1];
246:   PetscStackCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, dlamda, &one, &one, indxc));
247:   for (i = 0; i < n; ++i) indx[i] = indxq[indxc[i]-1];
248:   for (i = 0; i < n; ++i) indxq[i] = 0;

250:   /* Calculate the allowable deflation tolerance */

252:   /* imax = BLASamax_(&n, z, &one); */
253:   imax = 1;
254:   dmax = PetscAbsReal(z[0]);
255:   for (i=1;i<n;i++) {
256:     if (PetscAbsReal(z[i])>dmax) {
257:       imax = i+1;
258:       dmax = PetscAbsReal(z[i]);
259:     }
260:   }
261:   /* jmax = BLASamax_(&n, d, &one); */
262:   jmax = 1;
263:   dmax = PetscAbsReal(d[0]);
264:   for (i=1;i<n;i++) {
265:     if (PetscAbsReal(d[i])>dmax) {
266:       jmax = i+1;
267:       dmax = PetscAbsReal(d[i]);
268:     }
269:   }

271:   eps = LAPACKlamch_("Epsilon");
272:   if (reltol < eps * 20) {
273:     /* use the standard deflation tolerance from the original LAPACK code */
274:     tol = eps * 8. * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
275:   } else {
276:     /* otherwise set TOL to the input parameter RELTOL ! */
277:     tol = reltol * PetscMax(PetscAbs(d[jmax-1]),PetscAbs(z[imax-1]));
278:   }

280:   /* If the rank-1 modifier is small enough, no more needs to be done */
281:   /* except to reorganize Q so that its columns correspond with the */
282:   /* elements in D. */

284:   if (*rho * PetscAbs(z[imax-1]) <= tol) {

286:     /* complete deflation because of small rank-one modifier */
287:     /* NOTE: in this case we need N*N space in the array Q2 ! */

289:     *dz = n;
290:     *k = 0;
291:     iq2 = 1;
292:     for (j = 0; j < n; ++j) {
293:       i = indx[j];
294:       indxq[j] = i;
295:       PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &q[(i-1)*ldq], &one, &q2[iq2-1], &one));
296:       dlamda[j] = d[i-1];
297:       iq2 += n;
298:     }
299:     PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &n, q2, &n, q, &ldq));
300:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, dlamda, &one, d, &one));
301:     return(0);
302:   }

304:   /* If there are multiple eigenvalues then the problem deflates.  Here */
305:   /* the number of equal eigenvalues is found.  As each equal */
306:   /* eigenvalue is found, an elementary reflector is computed to rotate */
307:   /* the corresponding eigensubspace so that the corresponding */
308:   /* components of Z are zero in this new basis. */
309:   
310:   /* initialize the column types */

312:   /* first N1 columns are initially (before deflation) of type 1 */
313:   for (i = 0; i < n1; ++i) coltyp[i] = 1;
314:   /* columns N1+1 to N are initially (before deflation) of type 3 */
315:   for (i = n1; i < n; ++i) coltyp[i] = 3;

317:   *k = 0;
318:   k2 = n + 1;
319:   for (j = 0; j < n; ++j) {
320:     nj = indx[j]-1;
321:     if (*rho * PetscAbs(z[nj]) <= tol) {

323:       /* Deflate due to small z component. */
324:       ++(*dz);
325:       --k2;

327:       /* deflated eigenpair, therefore column type 4 */
328:       coltyp[nj] = 4;
329:       indxp[k2-1] = nj+1;
330:       indxq[k2-1] = nj+1;
331:       if (j+1 == n) goto L100;
332:     } else {

334:       /* not deflated */
335:       pj = nj;
336:       goto L80;
337:     }
338:   }
339:   factmp = 1;
340: L80:
341:   ++j;
342:   nj = indx[j]-1;
343:   if (j+1 > n) goto L100;
344:   if (*rho * PetscAbs(z[nj]) <= tol) {

346:     /* Deflate due to small z component. */
347:     ++(*dz);
348:     --k2;
349:     coltyp[nj] = 4;
350:     indxp[k2-1] = nj+1;
351:     indxq[k2-1] = nj+1;
352:   } else {

354:     /* Check if eigenvalues are close enough to allow deflation. */
355:     s = z[pj];
356:     c = z[nj];

358:     /* Find sqrt(a**2+b**2) without overflow or */
359:     /* destructive underflow. */

361:     tau = LAPACKlapy2_(&c, &s);
362:     t = d[nj] - d[pj];
363:     c /= tau;
364:     s = -s / tau;
365:     if (PetscAbs(t * c * s) <= tol) {

367:       /* Deflate due to close eigenvalues. */
368:       ++(*de);
369:       z[nj] = tau;
370:       z[pj] = 0.;
371:       if (coltyp[nj] != coltyp[pj]) coltyp[nj] = 2;

373:         /* if deflation happens across the two eigenvector blocks */
374:         /* (eigenvalues corresponding to different blocks), then */
375:         /* on column in the eigenvector matrix fills up completely */
376:         /* (changes from type 1 or 3 to type 2) */

378:         /* eigenpair PJ is deflated, therefore the column type changes */
379:         /* to 4 */

381:         coltyp[pj] = 4;
382:         PetscStackCallBLAS("BLASrot",BLASrot_(&n, &q[pj*ldq], &one, &q[nj*ldq], &one, &c, &s));
383:         t = d[pj] * (c * c) + d[nj] * (s * s);
384:         d[nj] = d[pj] * (s * s) + d[nj] * (c * c);
385:         d[pj] = t;
386:         --k2;
387:         i = 1;
388: L90:
389:         if (k2 + i <= n) {
390:           if (d[pj] < d[indxp[k2 + i-1]-1]) {
391:             indxp[k2 + i - 2] = indxp[k2 + i - 1];
392:             indxq[k2 + i - 2] = indxq[k2 + i - 1];
393:             indxp[k2 + i - 1] = pj + 1;
394:             indxq[k2 + i - 2] = pj + 1;
395:             ++i;
396:             goto L90;
397:           } else {
398:             indxp[k2 + i - 2] = pj + 1;
399:             indxq[k2 + i - 2] = pj + 1;
400:           }
401:         } else {
402:           indxp[k2 + i - 2] = pj + 1;
403:           indxq[k2 + i - 2] = pj + 1;
404:         }
405:         pj = nj;
406:         factmp = -1;
407:       } else {

409:       /* do not deflate */
410:       ++(*k);
411:       dlamda[*k-1] = d[pj];
412:       w[*k-1] = z[pj];
413:       indxp[*k-1] = pj + 1;
414:       indxq[*k-1] = pj + 1;
415:       factmp = 1;
416:       pj = nj;
417:     }
418:   }
419:   goto L80;
420: L100:

422:   /* Record the last eigenvalue. */
423:   ++(*k);
424:   dlamda[*k-1] = d[pj];
425:   w[*k-1] = z[pj];
426:   indxp[*k-1] = pj+1;
427:   indxq[*k-1] = (pj+1) * factmp;

429:   /* Count up the total number of the various types of columns, then */
430:   /* form a permutation which positions the four column types into */
431:   /* four uniform groups (although one or more of these groups may be */
432:   /* empty). */

434:   for (j = 0; j < 4; ++j) ctot[j] = 0;
435:   for (j = 0; j < n; ++j) {
436:     ct = coltyp[j];
437:     ++ctot[ct-1];
438:   }

440:   /* PSM(*) = Position in SubMatrix (of types 1 through 4) */
441:   psm[0] = 1;
442:   psm[1] = ctot[0] + 1;
443:   psm[2] = psm[1] + ctot[1];
444:   psm[3] = psm[2] + ctot[2];
445:   *k = n - ctot[3];

447:   /* Fill out the INDXC array so that the permutation which it induces */
448:   /* will place all type-1 columns first, all type-2 columns next, */
449:   /* then all type-3's, and finally all type-4's. */

451:   for (j = 0; j < n; ++j) {
452:     js = indxp[j];
453:     ct = coltyp[js-1];
454:     indx[psm[ct - 1]-1] = js;
455:     indxc[psm[ct - 1]-1] = j+1;
456:     ++psm[ct - 1];
457:   }

459:   /* Sort the eigenvalues and corresponding eigenvectors into DLAMDA */
460:   /* and Q2 respectively.  The eigenvalues/vectors which were not */
461:   /* deflated go into the first K slots of DLAMDA and Q2 respectively, */
462:   /* while those which were deflated go into the last N - K slots. */

464:   i = 0;
465:   iq1 = 0;
466:   iq2 = (ctot[0] + ctot[1]) * n1;
467:   for (j = 0; j < ctot[0]; ++j) {
468:     js = indx[i];
469:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
470:     z[i] = d[js-1];
471:     ++i;
472:     iq1 += n1;
473:   }

475:   for (j = 0; j < ctot[1]; ++j) {
476:     js = indx[i];
477:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n1, &q[(js-1)*ldq], &one, &q2[iq1], &one));
478:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
479:     z[i] = d[js-1];
480:     ++i;
481:     iq1 += n1;
482:     iq2 += n2;
483:   }

485:   for (j = 0; j < ctot[2]; ++j) {
486:     js = indx[i];
487:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n2, &q[n1+(js-1)*ldq], &one, &q2[iq2], &one));
488:     z[i] = d[js-1];
489:     ++i;
490:     iq2 += n2;
491:   }

493:   iq1 = iq2;
494:   for (j = 0; j < ctot[3]; ++j) {
495:     js = indx[i];
496:     PetscStackCallBLAS("BLAScopy",BLAScopy_(&n, &q[(js-1)*ldq], &one, &q2[iq2], &one));
497:     iq2 += n;
498:     z[i] = d[js-1];
499:     ++i;
500:   }

502:   /* The deflated eigenvalues and their corresponding vectors go back */
503:   /* into the last N - K slots of D and Q respectively. */

505:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n, &ctot[3], &q2[iq1], &n, &q[*k*ldq], &ldq));
506:   i1 = n - *k;
507:   PetscStackCallBLAS("BLAScopy",BLAScopy_(&i1, &z[*k], &one, &d[*k], &one));

509:   /* Copy CTOT into COLTYP for referencing in DLAED3M. */

511:   for (j = 0; j < 4; ++j) coltyp[j] = ctot[j];
512:   return(0);
513: #endif
514: }