Actual source code: fnutil.c

slepc-3.7.3 2016-09-29
Report Typos and Errors
  1: /*
  2:    Utility subroutines common to several impls

  4:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  5:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  6:    Copyright (c) 2002-2016, Universitat Politecnica de Valencia, Spain

  8:    This file is part of SLEPc.

 10:    SLEPc is free software: you can redistribute it and/or modify it under  the
 11:    terms of version 3 of the GNU Lesser General Public License as published by
 12:    the Free Software Foundation.

 14:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 15:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 16:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 17:    more details.

 19:    You  should have received a copy of the GNU Lesser General  Public  License
 20:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 21:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 22: */

 24: #include <slepc/private/fnimpl.h>      /*I "slepcfn.h" I*/
 25: #include <slepcblaslapack.h>

 29: /*
 30:    Compute the square root of an upper quasi-triangular matrix T,
 31:    using Higham's algorithm (LAA 88, 1987). T is overwritten with sqrtm(T).
 32:  */
 33: PetscErrorCode SlepcMatDenseSqrt(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld)
 34: {
 35: #if defined(SLEPC_MISSING_LAPACK_TRSYL)
 37:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSYL - Lapack routine unavailable");
 38: #else
 39:   PetscScalar  one=1.0,mone=-1.0;
 40:   PetscReal    done=1.0;
 41:   PetscBLASInt i,j,si,sj,r,ione=1,info;
 42: #if !defined(PETSC_USE_COMPLEX)
 43:   PetscReal    alpha,theta,mu,mu2;
 44: #endif

 47:   for (j=0;j<n;j++) {
 48: #if defined(PETSC_USE_COMPLEX)
 49:     sj = 1;
 50:     T[j+j*ld] = PetscSqrtScalar(T[j+j*ld]);
 51: #else
 52:     sj = (j==n-1 || T[j+1+j*ld] == 0.0)? 1: 2;
 53:     if (sj==1) {
 54:       if (T[j+j*ld]<0.0) SETERRQ(PETSC_COMM_SELF,1,"Matrix has a real negative eigenvalue, no real primary square root exists");
 55:       T[j+j*ld] = PetscSqrtReal(T[j+j*ld]);
 56:     } else {
 57:       /* square root of 2x2 block */
 58:       theta = (T[j+j*ld]+T[j+1+(j+1)*ld])/2.0;
 59:       mu    = (T[j+j*ld]-T[j+1+(j+1)*ld])/2.0;
 60:       mu2   = -mu*mu-T[j+1+j*ld]*T[j+(j+1)*ld];
 61:       mu    = PetscSqrtReal(mu2);
 62:       if (theta>0.0) alpha = PetscSqrtReal((theta+PetscSqrtReal(theta*theta+mu2))/2.0);
 63:       else alpha = mu/PetscSqrtReal(2.0*(-theta+PetscSqrtReal(theta*theta+mu2)));
 64:       T[j+j*ld]       /= 2.0*alpha;
 65:       T[j+1+(j+1)*ld] /= 2.0*alpha;
 66:       T[j+(j+1)*ld]   /= 2.0*alpha;
 67:       T[j+1+j*ld]     /= 2.0*alpha;
 68:       T[j+j*ld]       += alpha-theta/(2.0*alpha);
 69:       T[j+1+(j+1)*ld] += alpha-theta/(2.0*alpha);
 70:     }
 71: #endif
 72:     for (i=j-1;i>=0;i--) {
 73: #if defined(PETSC_USE_COMPLEX)
 74:       si = 1;
 75: #else
 76:       si = (i==0 || T[i+(i-1)*ld] == 0.0)? 1: 2;
 77:       if (si==2) i--;
 78: #endif
 79:       /* solve Sylvester equation of order si x sj */
 80:       r = j-i-si;
 81:       if (r) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&si,&sj,&r,&mone,T+i+(i+si)*ld,&ld,T+i+si+j*ld,&ld,&one,T+i+j*ld,&ld));
 82:       PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,&si,&sj,T+i+i*ld,&ld,T+j+j*ld,&ld,T+i+j*ld,&ld,&done,&info));
 83:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSYL %d",info);
 84:     }
 85:     if (sj==2) j++;
 86:   }
 87:   return(0);
 88: #endif
 89: }

 91: #define BLOCKSIZE 64

 95: /*
 96:    Simplified Schur-Parlett algorithm on an upper quasi-triangular matrix T,
 97:    particularized for the square root function. T is overwritten with sqrtm(T).
 98:    If firstonly then only the first column of T will contain relevant values.
 99:  */
100: PetscErrorCode SlepcSchurParlettSqrt(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
101: {
102: #if defined(SLEPC_MISSING_LAPACK_GEES) || defined(SLEPC_MISSING_LAPACK_TRSYL)
104:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEES/TRSYL - Lapack routines are unavailable");
105: #else
107:   PetscBLASInt   i,j,k,r,ione=1,sdim,lwork,*s,*p,info,bs=BLOCKSIZE;
108:   PetscScalar    *wr,*W,*Q,*work,one=1.0,zero=0.0,mone=-1.0;
109:   PetscInt       m,nblk;
110:   PetscReal      done=1.0;
111: #if defined(PETSC_USE_COMPLEX)
112:   PetscReal      *rwork;
113: #else
114:   PetscReal      *wi;
115: #endif

118:   m     = n;
119:   nblk  = (m+bs-1)/bs;
120:   lwork = 5*n;
121:   k     = firstonly? 1: n;

123:   /* compute Schur decomposition A*Q = Q*T */
124: #if !defined(PETSC_USE_COMPLEX)
125:   PetscMalloc7(m,&wr,m,&wi,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p);
126:   PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
127:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEES %d",info);
128: #else
129:   PetscMalloc7(m,&wr,m,&rwork,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p);
130:   PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
131:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEES %d",info);
132: #endif

134:   /* determine block sizes and positions, to avoid cutting 2x2 blocks */
135:   j = 0;
136:   p[j] = 0;
137:   do {
138:     s[j] = PetscMin(bs,n-p[j]);
139: #if !defined(PETSC_USE_COMPLEX)
140:     if (p[j]+s[j]!=n && T[p[j]+s[j]+(p[j]+s[j]-1)*ld]!=0.0) s[j]++;
141: #endif
142:     if (p[j]+s[j]==n) break;
143:     j++;
144:     p[j] = p[j-1]+s[j-1];
145:   } while (1);
146:   nblk = j+1;

148:   for (j=0;j<nblk;j++) {
149:     /* evaluate f(T_jj) */
150:     SlepcMatDenseSqrt(s[j],T+p[j]+p[j]*ld,ld);
151:     for (i=j-1;i>=0;i--) {
152:       /* solve Sylvester equation for block (i,j) */
153:       r = p[j]-p[i]-s[i];
154:       if (r) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",s+i,s+j,&r,&mone,T+p[i]+(p[i]+s[i])*ld,&ld,T+p[i]+s[i]+p[j]*ld,&ld,&one,T+p[i]+p[j]*ld,&ld));
155:       PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,s+i,s+j,T+p[i]+p[i]*ld,&ld,T+p[j]+p[j]*ld,&ld,T+p[i]+p[j]*ld,&ld,&done,&info));
156:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSYL %d",info);
157:     }
158:   }

160:   /* backtransform B = Q*T*Q' */
161:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,T,&ld,Q,&ld,&zero,W,&ld));
162:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));

164: #if !defined(PETSC_USE_COMPLEX)
165:   PetscFree7(wr,wi,W,Q,work,s,p);
166: #else
167:   PetscFree7(wr,rwork,W,Q,work,s,p);
168: #endif
169:   return(0);
170: #endif
171: }