1: /*
2: BDC - Block-divide and conquer (see description in README file).
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc/private/dsimpl.h>
25: #include <slepcblaslapack.h>
27: PetscErrorCode BDC_dmerg2_(const char *jobz,PetscBLASInt rkct,PetscBLASInt n, 28: PetscReal *ev,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq, 29: PetscReal *rho,PetscReal *u,PetscBLASInt sbrkp1,PetscReal *v, 30: PetscBLASInt sbrk,PetscBLASInt cutpnt,PetscReal *work,PetscBLASInt lwork, 31: PetscBLASInt *iwork,PetscReal tol,PetscBLASInt *info,PetscBLASInt jobz_len) 32: {
33: /* -- Routine written in LAPACK Version 3.0 style -- */
34: /* *************************************************** */
35: /* Written by */
36: /* Michael Moldaschl and Wilfried Gansterer */
37: /* University of Vienna */
38: /* last modification: March 16, 2014 */
40: /* Small adaptations of original code written by */
41: /* Wilfried Gansterer and Bob Ward, */
42: /* Department of Computer Science, University of Tennessee */
43: /* see http://dx.doi.org/10.1137/S1064827501399432 */
44: /* *************************************************** */
46: /* Purpose */
47: /* ======= */
49: /* DMERG2 computes the updated eigensystem of a diagonal matrix after */
50: /* modification by a rank-one symmetric matrix. The diagonal matrix */
51: /* consists of two diagonal submatrices, and the vectors defining the */
52: /* rank-1 matrix similarly have two underlying subvectors each. */
53: /* The dimension of the first subproblem is CUTPNT, the dimension of */
54: /* the second subproblem is N-CUTPNT. */
56: /* T = Q(in) ( EV(in) + RHO * Z*Z' ) Q'(in) = Q(out) * EV(out) * Q'(out) */
58: /* where Z = Q'[V U']', where V is a row vector and U is a column */
59: /* vector with dimensions corresponding to the two underlying */
60: /* subproblems. */
62: /* The eigenvectors of the original matrix are stored in Q, and the */
63: /* eigenvalues in EV. The algorithm consists of three stages: */
65: /* The first stage consists of deflating the size of the problem */
66: /* when there are multiple eigenvalues or if there is a zero in */
67: /* the Z vector. For each such occurrence the dimension of the */
68: /* secular equation problem is reduced by one. This stage is */
69: /* performed by the routine DSRTDF. */
71: /* The second stage consists of calculating the updated */
72: /* eigenvalues. This is done by finding the roots of the secular */
73: /* equation via the routine DLAED4 (as called by DLAED3M). */
74: /* This routine also calculates the eigenvectors of the current */
75: /* problem. */
77: /* If( JOBZ.EQ.'D' ) then the final stage consists */
78: /* of computing the updated eigenvectors directly using the updated */
79: /* eigenvalues. The eigenvectors for the current problem are multiplied */
80: /* with the eigenvectors from the overall problem. */
82: /* Arguments */
83: /* ========= */
85: /* JOBZ (input) CHARACTER*1 */
86: /* = 'N': Compute eigenvalues only (not implemented); */
87: /* = 'D': Compute eigenvalues and eigenvectors. */
88: /* Eigenvectors are accumulated in the divide-and-conquer */
89: /* process. */
91: /* RKCT (input) INTEGER */
92: /* The number of the rank modification which is accounted for */
93: /* (RKCT >= 1). Required parameter, because the update operation of the */
94: /* modification vector can be performed much more efficiently */
95: /* if RKCT.EQ.1. In that case, the eigenvector matrix is still */
96: /* block-diagonal. For RKCT.GE.2 the eigenvector matrix for the update */
97: /* operation has filled up and is a full matrix. */
99: /* N (input) INTEGER */
100: /* The dimension of the symmetric block tridiagonal matrix. */
101: /* N >= 0. */
103: /* EV (input/output) DOUBLE PRECISION array, dimension (N) */
104: /* On entry, the eigenvalues (=diagonal values) of the */
105: /* rank-1-perturbed matrix. */
106: /* On exit, the eigenvalues of the repaired matrix. */
108: /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
109: /* On entry, the eigenvectors of the rank-1-perturbed matrix. */
110: /* On exit, the eigenvectors of the repaired tridiagonal matrix. */
112: /* LDQ (input) INTEGER */
113: /* The leading dimension of the array Q. LDQ >= max(1,N). */
115: /* INDXQ (input/output) INTEGER array, dimension (N) */
116: /* On entry, the permutation which separately sorts the two */
117: /* subproblems in EV into ascending order. */
118: /* On exit, the permutation which will reintegrate the */
119: /* subproblems back into sorted order, */
120: /* i.e. EV( INDXQ( I = 1, N ) ) will be in ascending order. */
122: /* RHO (input/output) DOUBLE PRECISION */
123: /* The scalar in the rank-1 perturbation. Modified (multiplied */
124: /* by 2) in DSRTDF. */
126: /* U (input) DOUBLE PRECISION array; dimension (SBRKP1), where SBRKP1 */
127: /* is the size of the first (original) block after CUTPNT. */
128: /* The column vector of the rank-1 subdiagonal connecting the */
129: /* two diagonal subproblems. */
130: /* Theoretically, zero entries might have to be appended after U */
131: /* in order to make it have dimension (N-CUTPNT). However, this */
132: /* is not required because it can be accounted for in the */
133: /* matrix-vector product using the argument SBRKP1. */
135: /* SBRKP1 (input) INTEGER */
136: /* Dimension of the relevant (non-zero) part of the vector U. */
137: /* Equal to the size of the first original block after the */
138: /* breakpoint. */
140: /* V (input) DOUBLE PRECISION array; dimension (SBRK), where SBRK */
141: /* is the size of the last (original) block before CUTPNT. */
142: /* The row vector of the rank-1 subdiagonal connecting the two */
143: /* diagonal subproblems. */
144: /* Theoretically, zero entries might have to be inserted in front */
145: /* of V in order to make it have dimension (CUTPNT). However, this */
146: /* is not required because it can be accounted for in the */
147: /* matrix-vector product using the argument SBRK. */
149: /* SBRK (input) INTEGER */
150: /* Dimension of the relevant (non-zero) part of the vector V. */
151: /* Equal to the size of the last original block before the */
152: /* breakpoint. */
154: /* CUTPNT (input) INTEGER */
155: /* The location of the last eigenvalue of the leading diagonal */
156: /* block. min(1,N) <= CUTPNT <= max(1,N). */
158: /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */
160: /* LWORK (input) INTEGER */
161: /* The dimension of the array WORK. */
162: /* In order to guarantee correct results in all cases, */
163: /* LWORK must be at least ( 2*N**2 + 3*N ). In many cases, */
164: /* less workspace is required. The absolute minimum required is */
165: /* ( N**2 + 3*N ). */
166: /* If the workspace provided is not sufficient, the routine will */
167: /* return a corresponding error code and report how much workspace */
168: /* was missing (see INFO). */
169: /* NOTE: This parameter is needed for determining whether enough */
170: /* workspace is provided, and, if not, for computing how */
171: /* much workspace is needed. */
173: /* IWORK (workspace) INTEGER array, dimension ( 4*N ) */
175: /* TOL (input) DOUBLE PRECISION */
176: /* User specified deflation tolerance for the routine DSRTDF. */
178: /* INFO (output) INTEGER */
179: /* = 0: successful exit. */
180: /* < -200: not enough workspace */
181: /* ABS(INFO + 200) numbers have to be stored in addition */
182: /* to the workspace provided, otherwise some eigenvectors */
183: /* will be incorrect. */
184: /* < 0, > -99: if INFO.EQ.-i, the i-th argument had an */
185: /* illegal value. */
186: /* > 0: if INFO.EQ.1, an eigenvalue did not converge */
187: /* if INFO.EQ.2, the deflation counters DZ and DE do not sum */
188: /* up to the total number N-K of components */
189: /* deflated */
191: /* Further Details */
192: /* =============== */
194: /* Based on code written by */
195: /* Wilfried Gansterer and Bob Ward, */
196: /* Department of Computer Science, University of Tennessee */
198: /* Based on the design of the LAPACK code Dlaed1.f written by Jeff */
199: /* Rutter, Computer Science Division, University of California at */
200: /* Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
202: /* ===================================================================== */
204: #if defined(SLEPC_MISSING_LAPACK_LAMRG)
206: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAMRG - Lapack routine is unavailable");
207: #else
208: PetscBLASInt i, k, n1, n2, de, is, dz, iw, iz, iq2, nmc, cpp1;
209: PetscBLASInt indx, indxc, indxp, lwmin, idlmda;
210: PetscBLASInt spneed, coltyp, tmpcut, i__1, i__2, one=1, mone=-1;
211: char defl[1];
212: PetscReal done = 1.0, dzero = 0.0;
216: *info = 0;
217: lwmin = n*n + n * 3;
219: if (n < 0) {
220: *info = -3;
221: } else if (ldq < PetscMax(1,n)) {
222: *info = -6;
223: } else if (cutpnt < PetscMin(1,n) || cutpnt > PetscMax(1,n)) {
224: *info = -13;
225: } else if (lwork < lwmin) {
226: *info = -15;
227: }
228: if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DMERG2",-(*info));
230: /* **************************************************************************** */
232: /* Quick return if possible */
234: if (n == 0) return(0);
236: /* **************************************************************************** */
238: /* The following values are integer pointers which indicate */
239: /* the portion of the workspace used by a particular array in DSRTDF */
240: /* and DLAED3M. */
242: iz = 0;
243: idlmda = iz + n;
244: iw = idlmda + n;
245: iq2 = iw + n;
246: is = iq2 + n * n;
248: /* After the pointer IS the matrix S is stored and read in WORK */
249: /* in the routine DLAED3M. */
251: indx = 0;
252: indxc = indx + n;
253: coltyp = indxc + n;
254: indxp = coltyp + n;
256: /* If eigenvectors are to be accumulated in the divide-and-conquer */
257: /* process ( JOBZ.EQ.'D' ) form the z-vector which consists of */
258: /* Q_1^T * V and Q_2^T * U. */
260: cpp1 = cutpnt + 1;
261: if (rkct == 1) {
263: /* for the first rank modification the eigenvector matrix has */
264: /* special block-diagonal structure and therefore Q_1^T * V and */
265: /* Q_2^T * U can be formed separately. */
267: PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &cutpnt, &done,
268: &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
269: nmc = n - cutpnt;
270: PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &nmc, &done,
271: &q[cpp1-1 + (cpp1-1)*ldq], &ldq, u,
272: &one, &dzero, &work[iz + cutpnt], &one));
274: } else {
276: /* for the higher rank modifications, the vectors V and U */
277: /* have to be multiplied with the full eigenvector matrix */
279: PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &n, &done,
280: &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
281: PetscStackCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &n, &done, &q[cpp1-1],
282: &ldq, u, &one, &done, &work[iz], &one));
284: }
286: /* **************************************************************************** */
288: /* Deflate eigenvalues. */
290: if (rkct == 1) {
292: /* for the first rank modification we need the actual cutpoint */
294: n1 = cutpnt;
295: tmpcut = cutpnt;
296: } else {
298: /* for the later rank modifications there is no actual cutpoint any more */
300: n1 = n;
302: /* The original value of CUTPNT has to be preserved for the next time */
303: /* this subroutine is called (therefore, CUTPNT is an INPUT parameter */
304: /* and not to be changed). Thus, assign N to TMPCUT and use the local */
305: /* variable TMPCUT from now on for the cut point. */
307: tmpcut = n;
308: }
310: /* initialize the flag DEFL (indicates whether deflation occurred - */
311: /* this information is needed later in DLAED3M) */
313: *(unsigned char *)defl = '0';
315: /* call DSRTDF for deflation */
317: BDC_dsrtdf_(&k, n, n1, ev, q, ldq, indxq, rho, &work[iz],
318: &work[idlmda], &work[iw], &work[iq2], &iwork[indx],
319: &iwork[indxc], &iwork[indxp], &iwork[coltyp], tol, &dz, &de, info);
320: 321: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: error in dsrtdf, info = %d",*info);
323: if (k < n) {
325: /* ....some deflation occurred in dsrtdf, set the flag DEFL */
326: /* (needed in DLAED3M.f, since Givens rotations need to be */
327: /* applied to the eigenvector matrix only if some deflation */
328: /* happened) */
330: *(unsigned char *)defl = '1';
331: }
333: /* **************************************************************************** */
335: /* Solve the Secular Equation. */
337: if (k != 0 || k == 0) {
339: /* ....not everything was deflated. */
340: 341: /* ....check whether enough workspace is available: */
342: 343: /* Note that the following (upper) bound SPNEED for the workspace */
344: /* requirements should also hold in the extreme case TMPCUT=N, */
345: /* which happens for every rank modification after the first one. */
347: i__1 = (iwork[coltyp] + iwork[coltyp+1]) * k;
348: i__2 = (iwork[coltyp+1] + iwork[coltyp + 2]) * k;
349: spneed = is + PetscMax(i__1,i__2) - 1;
351: if (spneed > lwork) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: Workspace needed exceeds the workspace provided by %d numbers",spneed-lwork);
353: /* calling DLAED3M for solving the secular equation. */
355: BDC_dlaed3m_(jobz, defl, k, n, tmpcut, ev, q, ldq,
356: *rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp],
357: &work[iw], &work[is], info, 1, 1);
358: if (*info) SETERRQ1(PETSC_COMM_SELF,1,"dmerg2: error in dlaed3m, info = %d",*info);
360: /* Prepare the INDXQ sorting permutation. */
362: n1 = k;
363: n2 = n - k;
364: PetscStackCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, ev, &one, &mone, indxq));
365: if (k == 0) {
366: for (i = 0; i < n; ++i) indxq[i] = i+1;
367: }
369: } else {
371: /* ....everything was deflated (K.EQ.0) */
373: for (i = 0; i < n; ++i) indxq[i] = i+1;
374: }
375: return(0);
376: #endif
377: }