LAPACK  3.5.0
LAPACK: Linear Algebra PACKage
sgesvd.f File Reference

Go to the source code of this file.

Functions/Subroutines

subroutine sgesvd (JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
  SGESVD computes the singular value decomposition (SVD) for GE matrices More...
 

Function/Subroutine Documentation

subroutine sgesvd ( character  JOBU,
character  JOBVT,
integer  M,
integer  N,
real, dimension( lda, * )  A,
integer  LDA,
real, dimension( * )  S,
real, dimension( ldu, * )  U,
integer  LDU,
real, dimension( ldvt, * )  VT,
integer  LDVT,
real, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

SGESVD computes the singular value decomposition (SVD) for GE matrices

Download SGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SGESVD computes the singular value decomposition (SVD) of a real
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**T, not V.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U are returned in array U:
          = 'S':  the first min(m,n) columns of U (the left singular
                  vectors) are returned in the array U;
          = 'O':  the first min(m,n) columns of U (the left singular
                  vectors) are overwritten on the array A;
          = 'N':  no columns of U (no left singular vectors) are
                  computed.
[in]JOBVT
          JOBVT is CHARACTER*1
          Specifies options for computing all or part of the matrix
          V**T:
          = 'A':  all N rows of V**T are returned in the array VT;
          = 'S':  the first min(m,n) rows of V**T (the right singular
                  vectors) are returned in the array VT;
          = 'O':  the first min(m,n) rows of V**T (the right singular
                  vectors) are overwritten on the array A;
          = 'N':  no rows of V**T (no right singular vectors) are
                  computed.

          JOBVT and JOBU cannot both be 'O'.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBU = 'O',  A is overwritten with the first min(m,n)
                          columns of U (the left singular vectors,
                          stored columnwise);
          if JOBVT = 'O', A is overwritten with the first min(m,n)
                          rows of V**T (the right singular vectors,
                          stored rowwise);
          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
                          are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is REAL array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is REAL array, dimension (LDU,UCOL)
          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
          if JOBU = 'S', U contains the first min(m,n) columns of U
          (the left singular vectors, stored columnwise);
          if JOBU = 'N' or 'O', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBU = 'S' or 'A', LDU >= M.
[out]VT
          VT is REAL array, dimension (LDVT,N)
          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
          V**T;
          if JOBVT = 'S', VT contains the first min(m,n) rows of
          V**T (the right singular vectors, stored rowwise);
          if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
          superdiagonal elements of an upper bidiagonal matrix B
          whose diagonal is in S (not necessarily sorted). B
          satisfies A = U * B * VT, so it has the same singular values
          as A, and singular vectors related by U and VT.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK.
          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
             - PATH 1  (M much larger than N, JOBU='N') 
             - PATH 1t (N much larger than M, JOBVT='N')
          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
          For good performance, LWORK should generally be larger.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if SBDSQR did not converge, INFO specifies how many
                superdiagonals of an intermediate bidiagonal form B
                did not converge to zero. See the description of WORK
                above for details.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Date
April 2012

Definition at line 213 of file sgesvd.f.

213 *
214 * -- LAPACK driver routine (version 3.4.1) --
215 * -- LAPACK is a software package provided by Univ. of Tennessee, --
216 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217 * April 2012
218 *
219 * .. Scalar Arguments ..
220  CHARACTER jobu, jobvt
221  INTEGER info, lda, ldu, ldvt, lwork, m, n
222 * ..
223 * .. Array Arguments ..
224  REAL a( lda, * ), s( * ), u( ldu, * ),
225  $ vt( ldvt, * ), work( * )
226 * ..
227 *
228 * =====================================================================
229 *
230 * .. Parameters ..
231  REAL zero, one
232  parameter( zero = 0.0e0, one = 1.0e0 )
233 * ..
234 * .. Local Scalars ..
235  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
236  $ wntva, wntvas, wntvn, wntvo, wntvs
237  INTEGER bdspac, blk, chunk, i, ie, ierr, ir, iscl,
238  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
239  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
240  $ nrvt, wrkbl
241  INTEGER lwork_sgeqrf, lwork_sorgqr_n, lwork_sorgqr_m,
242  $ lwork_sgebrd, lwork_sorgbr_p, lwork_sorgbr_q,
243  $ lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
244  REAL anrm, bignum, eps, smlnum
245 * ..
246 * .. Local Arrays ..
247  REAL dum( 1 )
248 * ..
249 * .. External Subroutines ..
250  EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
252  $ xerbla
253 * ..
254 * .. External Functions ..
255  LOGICAL lsame
256  INTEGER ilaenv
257  REAL slamch, slange
258  EXTERNAL lsame, ilaenv, slamch, slange
259 * ..
260 * .. Intrinsic Functions ..
261  INTRINSIC max, min, sqrt
262 * ..
263 * .. Executable Statements ..
264 *
265 * Test the input arguments
266 *
267  info = 0
268  minmn = min( m, n )
269  wntua = lsame( jobu, 'A' )
270  wntus = lsame( jobu, 'S' )
271  wntuas = wntua .OR. wntus
272  wntuo = lsame( jobu, 'O' )
273  wntun = lsame( jobu, 'N' )
274  wntva = lsame( jobvt, 'A' )
275  wntvs = lsame( jobvt, 'S' )
276  wntvas = wntva .OR. wntvs
277  wntvo = lsame( jobvt, 'O' )
278  wntvn = lsame( jobvt, 'N' )
279  lquery = ( lwork.EQ.-1 )
280 *
281  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
282  info = -1
283  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
284  $ ( wntvo .AND. wntuo ) ) THEN
285  info = -2
286  ELSE IF( m.LT.0 ) THEN
287  info = -3
288  ELSE IF( n.LT.0 ) THEN
289  info = -4
290  ELSE IF( lda.LT.max( 1, m ) ) THEN
291  info = -6
292  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
293  info = -9
294  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
295  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
296  info = -11
297  END IF
298 *
299 * Compute workspace
300 * (Note: Comments in the code beginning "Workspace:" describe the
301 * minimal amount of workspace needed at that point in the code,
302 * as well as the preferred amount for good performance.
303 * NB refers to the optimal block size for the immediately
304 * following subroutine, as returned by ILAENV.)
305 *
306  IF( info.EQ.0 ) THEN
307  minwrk = 1
308  maxwrk = 1
309  IF( m.GE.n .AND. minmn.GT.0 ) THEN
310 *
311 * Compute space needed for SBDSQR
312 *
313  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
314  bdspac = 5*n
315 * Compute space needed for SGEQRF
316  CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_sgeqrf=dum(1)
318 * Compute space needed for SORGQR
319  CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
320  lwork_sorgqr_n=dum(1)
321  CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
322  lwork_sorgqr_m=dum(1)
323 * Compute space needed for SGEBRD
324  CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
325  $ dum(1), dum(1), -1, ierr )
326  lwork_sgebrd=dum(1)
327 * Compute space needed for SORGBR P
328  CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
329  $ dum(1), -1, ierr )
330  lwork_sorgbr_p=dum(1)
331 * Compute space needed for SORGBR Q
332  CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
333  $ dum(1), -1, ierr )
334  lwork_sorgbr_q=dum(1)
335 *
336  IF( m.GE.mnthr ) THEN
337  IF( wntun ) THEN
338 *
339 * Path 1 (M much larger than N, JOBU='N')
340 *
341  maxwrk = n + lwork_sgeqrf
342  maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
343  IF( wntvo .OR. wntvas )
344  $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
345  maxwrk = max( maxwrk, bdspac )
346  minwrk = max( 4*n, bdspac )
347  ELSE IF( wntuo .AND. wntvn ) THEN
348 *
349 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350 *
351  wrkbl = n + lwork_sgeqrf
352  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
353  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
354  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
355  wrkbl = max( wrkbl, bdspac )
356  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
357  minwrk = max( 3*n+m, bdspac )
358  ELSE IF( wntuo .AND. wntvas ) THEN
359 *
360 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361 * 'A')
362 *
363  wrkbl = n + lwork_sgeqrf
364  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
365  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
366  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
367  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
368  wrkbl = max( wrkbl, bdspac )
369  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
370  minwrk = max( 3*n+m, bdspac )
371  ELSE IF( wntus .AND. wntvn ) THEN
372 *
373 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374 *
375  wrkbl = n + lwork_sgeqrf
376  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
377  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
378  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
379  wrkbl = max( wrkbl, bdspac )
380  maxwrk = n*n + wrkbl
381  minwrk = max( 3*n+m, bdspac )
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_sgeqrf
387  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
388  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
389  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
390  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
391  wrkbl = max( wrkbl, bdspac )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = max( 3*n+m, bdspac )
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_sgeqrf
400  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
401  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
402  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
403  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
404  wrkbl = max( wrkbl, bdspac )
405  maxwrk = n*n + wrkbl
406  minwrk = max( 3*n+m, bdspac )
407  ELSE IF( wntua .AND. wntvn ) THEN
408 *
409 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410 *
411  wrkbl = n + lwork_sgeqrf
412  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
413  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
414  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
415  wrkbl = max( wrkbl, bdspac )
416  maxwrk = n*n + wrkbl
417  minwrk = max( 3*n+m, bdspac )
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_sgeqrf
423  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
424  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
425  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
426  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
427  wrkbl = max( wrkbl, bdspac )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = max( 3*n+m, bdspac )
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_sgeqrf
436  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
437  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
438  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
439  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
440  wrkbl = max( wrkbl, bdspac )
441  maxwrk = n*n + wrkbl
442  minwrk = max( 3*n+m, bdspac )
443  END IF
444  ELSE
445 *
446 * Path 10 (M at least N, but not much larger)
447 *
448  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
449  $ dum(1), dum(1), -1, ierr )
450  lwork_sgebrd=dum(1)
451  maxwrk = 3*n + lwork_sgebrd
452  IF( wntus .OR. wntuo ) THEN
453  CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
454  $ dum(1), -1, ierr )
455  lwork_sorgbr_q=dum(1)
456  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
457  END IF
458  IF( wntua ) THEN
459  CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
460  $ dum(1), -1, ierr )
461  lwork_sorgbr_q=dum(1)
462  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
463  END IF
464  IF( .NOT.wntvn ) THEN
465  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
466  END IF
467  maxwrk = max( maxwrk, bdspac )
468  minwrk = max( 3*n+m, bdspac )
469  END IF
470  ELSE IF( minmn.GT.0 ) THEN
471 *
472 * Compute space needed for SBDSQR
473 *
474  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
475  bdspac = 5*m
476 * Compute space needed for SGELQF
477  CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
478  lwork_sgelqf=dum(1)
479 * Compute space needed for SORGLQ
480  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
481  lwork_sorglq_n=dum(1)
482  CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
483  lwork_sorglq_m=dum(1)
484 * Compute space needed for SGEBRD
485  CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
486  $ dum(1), dum(1), -1, ierr )
487  lwork_sgebrd=dum(1)
488 * Compute space needed for SORGBR P
489  CALL sorgbr( 'P', m, m, m, a, n, dum(1),
490  $ dum(1), -1, ierr )
491  lwork_sorgbr_p=dum(1)
492 * Compute space needed for SORGBR Q
493  CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
494  $ dum(1), -1, ierr )
495  lwork_sorgbr_q=dum(1)
496  IF( n.GE.mnthr ) THEN
497  IF( wntvn ) THEN
498 *
499 * Path 1t(N much larger than M, JOBVT='N')
500 *
501  maxwrk = m + lwork_sgelqf
502  maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
503  IF( wntuo .OR. wntuas )
504  $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
505  maxwrk = max( maxwrk, bdspac )
506  minwrk = max( 4*m, bdspac )
507  ELSE IF( wntvo .AND. wntun ) THEN
508 *
509 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510 *
511  wrkbl = m + lwork_sgelqf
512  wrkbl = max( wrkbl, m+lwork_sorglq_m )
513  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
514  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
515  wrkbl = max( wrkbl, bdspac )
516  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
517  minwrk = max( 3*m+n, bdspac )
518  ELSE IF( wntvo .AND. wntuas ) THEN
519 *
520 * Path 3t(N much larger than M, JOBU='S' or 'A',
521 * JOBVT='O')
522 *
523  wrkbl = m + lwork_sgelqf
524  wrkbl = max( wrkbl, m+lwork_sorglq_m )
525  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
526  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
527  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
528  wrkbl = max( wrkbl, bdspac )
529  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
530  minwrk = max( 3*m+n, bdspac )
531  ELSE IF( wntvs .AND. wntun ) THEN
532 *
533 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534 *
535  wrkbl = m + lwork_sgelqf
536  wrkbl = max( wrkbl, m+lwork_sorglq_m )
537  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
538  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
539  wrkbl = max( wrkbl, bdspac )
540  maxwrk = m*m + wrkbl
541  minwrk = max( 3*m+n, bdspac )
542  ELSE IF( wntvs .AND. wntuo ) THEN
543 *
544 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545 *
546  wrkbl = m + lwork_sgelqf
547  wrkbl = max( wrkbl, m+lwork_sorglq_m )
548  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
549  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
550  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
551  wrkbl = max( wrkbl, bdspac )
552  maxwrk = 2*m*m + wrkbl
553  minwrk = max( 3*m+n, bdspac )
554  maxwrk = max( maxwrk, minwrk )
555  ELSE IF( wntvs .AND. wntuas ) THEN
556 *
557 * Path 6t(N much larger than M, JOBU='S' or 'A',
558 * JOBVT='S')
559 *
560  wrkbl = m + lwork_sgelqf
561  wrkbl = max( wrkbl, m+lwork_sorglq_m )
562  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
563  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
564  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
565  wrkbl = max( wrkbl, bdspac )
566  maxwrk = m*m + wrkbl
567  minwrk = max( 3*m+n, bdspac )
568  ELSE IF( wntva .AND. wntun ) THEN
569 *
570 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
571 *
572  wrkbl = m + lwork_sgelqf
573  wrkbl = max( wrkbl, m+lwork_sorglq_n )
574  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
575  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
576  wrkbl = max( wrkbl, bdspac )
577  maxwrk = m*m + wrkbl
578  minwrk = max( 3*m+n, bdspac )
579  ELSE IF( wntva .AND. wntuo ) THEN
580 *
581 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
582 *
583  wrkbl = m + lwork_sgelqf
584  wrkbl = max( wrkbl, m+lwork_sorglq_n )
585  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
586  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
587  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
588  wrkbl = max( wrkbl, bdspac )
589  maxwrk = 2*m*m + wrkbl
590  minwrk = max( 3*m+n, bdspac )
591  ELSE IF( wntva .AND. wntuas ) THEN
592 *
593 * Path 9t(N much larger than M, JOBU='S' or 'A',
594 * JOBVT='A')
595 *
596  wrkbl = m + lwork_sgelqf
597  wrkbl = max( wrkbl, m+lwork_sorglq_n )
598  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
599  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
600  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
601  wrkbl = max( wrkbl, bdspac )
602  maxwrk = m*m + wrkbl
603  minwrk = max( 3*m+n, bdspac )
604  END IF
605  ELSE
606 *
607 * Path 10t(N greater than M, but not much larger)
608 *
609  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
610  $ dum(1), dum(1), -1, ierr )
611  lwork_sgebrd=dum(1)
612  maxwrk = 3*m + lwork_sgebrd
613  IF( wntvs .OR. wntvo ) THEN
614 * Compute space needed for SORGBR P
615  CALL sorgbr( 'P', m, n, m, a, n, dum(1),
616  $ dum(1), -1, ierr )
617  lwork_sorgbr_p=dum(1)
618  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
619  END IF
620  IF( wntva ) THEN
621  CALL sorgbr( 'P', n, n, m, a, n, dum(1),
622  $ dum(1), -1, ierr )
623  lwork_sorgbr_p=dum(1)
624  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
625  END IF
626  IF( .NOT.wntun ) THEN
627  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
628  END IF
629  maxwrk = max( maxwrk, bdspac )
630  minwrk = max( 3*m+n, bdspac )
631  END IF
632  END IF
633  maxwrk = max( maxwrk, minwrk )
634  work( 1 ) = maxwrk
635 *
636  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
637  info = -13
638  END IF
639  END IF
640 *
641  IF( info.NE.0 ) THEN
642  CALL xerbla( 'SGESVD', -info )
643  RETURN
644  ELSE IF( lquery ) THEN
645  RETURN
646  END IF
647 *
648 * Quick return if possible
649 *
650  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
651  RETURN
652  END IF
653 *
654 * Get machine constants
655 *
656  eps = slamch( 'P' )
657  smlnum = sqrt( slamch( 'S' ) ) / eps
658  bignum = one / smlnum
659 *
660 * Scale A if max element outside range [SMLNUM,BIGNUM]
661 *
662  anrm = slange( 'M', m, n, a, lda, dum )
663  iscl = 0
664  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
665  iscl = 1
666  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
667  ELSE IF( anrm.GT.bignum ) THEN
668  iscl = 1
669  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
670  END IF
671 *
672  IF( m.GE.n ) THEN
673 *
674 * A has at least as many rows as columns. If A has sufficiently
675 * more rows than columns, first reduce using the QR
676 * decomposition (if sufficient workspace available)
677 *
678  IF( m.GE.mnthr ) THEN
679 *
680  IF( wntun ) THEN
681 *
682 * Path 1 (M much larger than N, JOBU='N')
683 * No left singular vectors to be computed
684 *
685  itau = 1
686  iwork = itau + n
687 *
688 * Compute A=Q*R
689 * (Workspace: need 2*N, prefer N+N*NB)
690 *
691  CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
692  $ lwork-iwork+1, ierr )
693 *
694 * Zero out below R
695 *
696  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
697  ie = 1
698  itauq = ie + n
699  itaup = itauq + n
700  iwork = itaup + n
701 *
702 * Bidiagonalize R in A
703 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
704 *
705  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706  $ work( itaup ), work( iwork ), lwork-iwork+1,
707  $ ierr )
708  ncvt = 0
709  IF( wntvo .OR. wntvas ) THEN
710 *
711 * If right singular vectors desired, generate P'.
712 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713 *
714  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715  $ work( iwork ), lwork-iwork+1, ierr )
716  ncvt = n
717  END IF
718  iwork = ie + n
719 *
720 * Perform bidiagonal QR iteration, computing right
721 * singular vectors of A in A if desired
722 * (Workspace: need BDSPAC)
723 *
724  CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725  $ dum, 1, dum, 1, work( iwork ), info )
726 *
727 * If right singular vectors desired in VT, copy them there
728 *
729  IF( wntvas )
730  $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731 *
732  ELSE IF( wntuo .AND. wntvn ) THEN
733 *
734 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735 * N left singular vectors to be overwritten on A and
736 * no right singular vectors to be computed
737 *
738  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739 *
740 * Sufficient workspace for a fast algorithm
741 *
742  ir = 1
743  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744 *
745 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
746 *
747  ldwrku = lda
748  ldwrkr = lda
749  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750 *
751 * WORK(IU) is LDA by N, WORK(IR) is N by N
752 *
753  ldwrku = lda
754  ldwrkr = n
755  ELSE
756 *
757 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758 *
759  ldwrku = ( lwork-n*n-n ) / n
760  ldwrkr = n
761  END IF
762  itau = ir + ldwrkr*n
763  iwork = itau + n
764 *
765 * Compute A=Q*R
766 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767 *
768  CALL sgeqrf( m, n, a, lda, work( itau ),
769  $ work( iwork ), lwork-iwork+1, ierr )
770 *
771 * Copy R to WORK(IR) and zero out below it
772 *
773  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774  CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775  $ ldwrkr )
776 *
777 * Generate Q in A
778 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779 *
780  CALL sorgqr( m, n, n, a, lda, work( itau ),
781  $ work( iwork ), lwork-iwork+1, ierr )
782  ie = itau
783  itauq = ie + n
784  itaup = itauq + n
785  iwork = itaup + n
786 *
787 * Bidiagonalize R in WORK(IR)
788 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789 *
790  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791  $ work( itauq ), work( itaup ),
792  $ work( iwork ), lwork-iwork+1, ierr )
793 *
794 * Generate left vectors bidiagonalizing R
795 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796 *
797  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798  $ work( itauq ), work( iwork ),
799  $ lwork-iwork+1, ierr )
800  iwork = ie + n
801 *
802 * Perform bidiagonal QR iteration, computing left
803 * singular vectors of R in WORK(IR)
804 * (Workspace: need N*N+BDSPAC)
805 *
806  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807  $ work( ir ), ldwrkr, dum, 1,
808  $ work( iwork ), info )
809  iu = ie + n
810 *
811 * Multiply Q in A by left singular vectors of R in
812 * WORK(IR), storing result in WORK(IU) and copying to A
813 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814 *
815  DO 10 i = 1, m, ldwrku
816  chunk = min( m-i+1, ldwrku )
817  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818  $ lda, work( ir ), ldwrkr, zero,
819  $ work( iu ), ldwrku )
820  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821  $ a( i, 1 ), lda )
822  10 CONTINUE
823 *
824  ELSE
825 *
826 * Insufficient workspace for a fast algorithm
827 *
828  ie = 1
829  itauq = ie + n
830  itaup = itauq + n
831  iwork = itaup + n
832 *
833 * Bidiagonalize A
834 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835 *
836  CALL sgebrd( m, n, a, lda, s, work( ie ),
837  $ work( itauq ), work( itaup ),
838  $ work( iwork ), lwork-iwork+1, ierr )
839 *
840 * Generate left vectors bidiagonalizing A
841 * (Workspace: need 4*N, prefer 3*N+N*NB)
842 *
843  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844  $ work( iwork ), lwork-iwork+1, ierr )
845  iwork = ie + n
846 *
847 * Perform bidiagonal QR iteration, computing left
848 * singular vectors of A in A
849 * (Workspace: need BDSPAC)
850 *
851  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852  $ a, lda, dum, 1, work( iwork ), info )
853 *
854  END IF
855 *
856  ELSE IF( wntuo .AND. wntvas ) THEN
857 *
858 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859 * N left singular vectors to be overwritten on A and
860 * N right singular vectors to be computed in VT
861 *
862  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863 *
864 * Sufficient workspace for a fast algorithm
865 *
866  ir = 1
867  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868 *
869 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
870 *
871  ldwrku = lda
872  ldwrkr = lda
873  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874 *
875 * WORK(IU) is LDA by N and WORK(IR) is N by N
876 *
877  ldwrku = lda
878  ldwrkr = n
879  ELSE
880 *
881 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882 *
883  ldwrku = ( lwork-n*n-n ) / n
884  ldwrkr = n
885  END IF
886  itau = ir + ldwrkr*n
887  iwork = itau + n
888 *
889 * Compute A=Q*R
890 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891 *
892  CALL sgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL slaset( 'L', n-1, n-1, zero, zero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904 *
905  CALL sorgqr( m, n, n, a, lda, work( itau ),
906  $ work( iwork ), lwork-iwork+1, ierr )
907  ie = itau
908  itauq = ie + n
909  itaup = itauq + n
910  iwork = itaup + n
911 *
912 * Bidiagonalize R in VT, copying result to WORK(IR)
913 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914 *
915  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916  $ work( itauq ), work( itaup ),
917  $ work( iwork ), lwork-iwork+1, ierr )
918  CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919 *
920 * Generate left vectors bidiagonalizing R in WORK(IR)
921 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922 *
923  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924  $ work( itauq ), work( iwork ),
925  $ lwork-iwork+1, ierr )
926 *
927 * Generate right vectors bidiagonalizing R in VT
928 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929 *
930  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931  $ work( iwork ), lwork-iwork+1, ierr )
932  iwork = ie + n
933 *
934 * Perform bidiagonal QR iteration, computing left
935 * singular vectors of R in WORK(IR) and computing right
936 * singular vectors of R in VT
937 * (Workspace: need N*N+BDSPAC)
938 *
939  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940  $ work( ir ), ldwrkr, dum, 1,
941  $ work( iwork ), info )
942  iu = ie + n
943 *
944 * Multiply Q in A by left singular vectors of R in
945 * WORK(IR), storing result in WORK(IU) and copying to A
946 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947 *
948  DO 20 i = 1, m, ldwrku
949  chunk = min( m-i+1, ldwrku )
950  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951  $ lda, work( ir ), ldwrkr, zero,
952  $ work( iu ), ldwrku )
953  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954  $ a( i, 1 ), lda )
955  20 CONTINUE
956 *
957  ELSE
958 *
959 * Insufficient workspace for a fast algorithm
960 *
961  itau = 1
962  iwork = itau + n
963 *
964 * Compute A=Q*R
965 * (Workspace: need 2*N, prefer N+N*NB)
966 *
967  CALL sgeqrf( m, n, a, lda, work( itau ),
968  $ work( iwork ), lwork-iwork+1, ierr )
969 *
970 * Copy R to VT, zeroing out below it
971 *
972  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973  IF( n.GT.1 )
974  $ CALL slaset( 'L', n-1, n-1, zero, zero,
975  $ vt( 2, 1 ), ldvt )
976 *
977 * Generate Q in A
978 * (Workspace: need 2*N, prefer N+N*NB)
979 *
980  CALL sorgqr( m, n, n, a, lda, work( itau ),
981  $ work( iwork ), lwork-iwork+1, ierr )
982  ie = itau
983  itauq = ie + n
984  itaup = itauq + n
985  iwork = itaup + n
986 *
987 * Bidiagonalize R in VT
988 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
989 *
990  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991  $ work( itauq ), work( itaup ),
992  $ work( iwork ), lwork-iwork+1, ierr )
993 *
994 * Multiply Q in A by left vectors bidiagonalizing R
995 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
996 *
997  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998  $ work( itauq ), a, lda, work( iwork ),
999  $ lwork-iwork+1, ierr )
1000 *
1001 * Generate right vectors bidiagonalizing R in VT
1002 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003 *
1004  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005  $ work( iwork ), lwork-iwork+1, ierr )
1006  iwork = ie + n
1007 *
1008 * Perform bidiagonal QR iteration, computing left
1009 * singular vectors of A in A and computing right
1010 * singular vectors of A in VT
1011 * (Workspace: need BDSPAC)
1012 *
1013  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014  $ a, lda, dum, 1, work( iwork ), info )
1015 *
1016  END IF
1017 *
1018  ELSE IF( wntus ) THEN
1019 *
1020  IF( wntvn ) THEN
1021 *
1022 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023 * N left singular vectors to be computed in U and
1024 * no right singular vectors to be computed
1025 *
1026  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027 *
1028 * Sufficient workspace for a fast algorithm
1029 *
1030  ir = 1
1031  IF( lwork.GE.wrkbl+lda*n ) THEN
1032 *
1033 * WORK(IR) is LDA by N
1034 *
1035  ldwrkr = lda
1036  ELSE
1037 *
1038 * WORK(IR) is N by N
1039 *
1040  ldwrkr = n
1041  END IF
1042  itau = ir + ldwrkr*n
1043  iwork = itau + n
1044 *
1045 * Compute A=Q*R
1046 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047 *
1048  CALL sgeqrf( m, n, a, lda, work( itau ),
1049  $ work( iwork ), lwork-iwork+1, ierr )
1050 *
1051 * Copy R to WORK(IR), zeroing out below it
1052 *
1053  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054  $ ldwrkr )
1055  CALL slaset( 'L', n-1, n-1, zero, zero,
1056  $ work( ir+1 ), ldwrkr )
1057 *
1058 * Generate Q in A
1059 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 *
1061  CALL sorgqr( m, n, n, a, lda, work( itau ),
1062  $ work( iwork ), lwork-iwork+1, ierr )
1063  ie = itau
1064  itauq = ie + n
1065  itaup = itauq + n
1066  iwork = itaup + n
1067 *
1068 * Bidiagonalize R in WORK(IR)
1069 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070 *
1071  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072  $ work( ie ), work( itauq ),
1073  $ work( itaup ), work( iwork ),
1074  $ lwork-iwork+1, ierr )
1075 *
1076 * Generate left vectors bidiagonalizing R in WORK(IR)
1077 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078 *
1079  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080  $ work( itauq ), work( iwork ),
1081  $ lwork-iwork+1, ierr )
1082  iwork = ie + n
1083 *
1084 * Perform bidiagonal QR iteration, computing left
1085 * singular vectors of R in WORK(IR)
1086 * (Workspace: need N*N+BDSPAC)
1087 *
1088  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089  $ 1, work( ir ), ldwrkr, dum, 1,
1090  $ work( iwork ), info )
1091 *
1092 * Multiply Q in A by left singular vectors of R in
1093 * WORK(IR), storing result in U
1094 * (Workspace: need N*N)
1095 *
1096  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097  $ work( ir ), ldwrkr, zero, u, ldu )
1098 *
1099  ELSE
1100 *
1101 * Insufficient workspace for a fast algorithm
1102 *
1103  itau = 1
1104  iwork = itau + n
1105 *
1106 * Compute A=Q*R, copying result to U
1107 * (Workspace: need 2*N, prefer N+N*NB)
1108 *
1109  CALL sgeqrf( m, n, a, lda, work( itau ),
1110  $ work( iwork ), lwork-iwork+1, ierr )
1111  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112 *
1113 * Generate Q in U
1114 * (Workspace: need 2*N, prefer N+N*NB)
1115 *
1116  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117  $ work( iwork ), lwork-iwork+1, ierr )
1118  ie = itau
1119  itauq = ie + n
1120  itaup = itauq + n
1121  iwork = itaup + n
1122 *
1123 * Zero out below R in A
1124 *
1125  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1126  $ lda )
1127 *
1128 * Bidiagonalize R in A
1129 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1130 *
1131  CALL sgebrd( n, n, a, lda, s, work( ie ),
1132  $ work( itauq ), work( itaup ),
1133  $ work( iwork ), lwork-iwork+1, ierr )
1134 *
1135 * Multiply Q in U by left vectors bidiagonalizing R
1136 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1137 *
1138  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1139  $ work( itauq ), u, ldu, work( iwork ),
1140  $ lwork-iwork+1, ierr )
1141  iwork = ie + n
1142 *
1143 * Perform bidiagonal QR iteration, computing left
1144 * singular vectors of A in U
1145 * (Workspace: need BDSPAC)
1146 *
1147  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1148  $ 1, u, ldu, dum, 1, work( iwork ),
1149  $ info )
1150 *
1151  END IF
1152 *
1153  ELSE IF( wntvo ) THEN
1154 *
1155 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1156 * N left singular vectors to be computed in U and
1157 * N right singular vectors to be overwritten on A
1158 *
1159  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1160 *
1161 * Sufficient workspace for a fast algorithm
1162 *
1163  iu = 1
1164  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1165 *
1166 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1167 *
1168  ldwrku = lda
1169  ir = iu + ldwrku*n
1170  ldwrkr = lda
1171  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1172 *
1173 * WORK(IU) is LDA by N and WORK(IR) is N by N
1174 *
1175  ldwrku = lda
1176  ir = iu + ldwrku*n
1177  ldwrkr = n
1178  ELSE
1179 *
1180 * WORK(IU) is N by N and WORK(IR) is N by N
1181 *
1182  ldwrku = n
1183  ir = iu + ldwrku*n
1184  ldwrkr = n
1185  END IF
1186  itau = ir + ldwrkr*n
1187  iwork = itau + n
1188 *
1189 * Compute A=Q*R
1190 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1191 *
1192  CALL sgeqrf( m, n, a, lda, work( itau ),
1193  $ work( iwork ), lwork-iwork+1, ierr )
1194 *
1195 * Copy R to WORK(IU), zeroing out below it
1196 *
1197  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1198  $ ldwrku )
1199  CALL slaset( 'L', n-1, n-1, zero, zero,
1200  $ work( iu+1 ), ldwrku )
1201 *
1202 * Generate Q in A
1203 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1204 *
1205  CALL sorgqr( m, n, n, a, lda, work( itau ),
1206  $ work( iwork ), lwork-iwork+1, ierr )
1207  ie = itau
1208  itauq = ie + n
1209  itaup = itauq + n
1210  iwork = itaup + n
1211 *
1212 * Bidiagonalize R in WORK(IU), copying result to
1213 * WORK(IR)
1214 * (Workspace: need 2*N*N+4*N,
1215 * prefer 2*N*N+3*N+2*N*NB)
1216 *
1217  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1218  $ work( ie ), work( itauq ),
1219  $ work( itaup ), work( iwork ),
1220  $ lwork-iwork+1, ierr )
1221  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1222  $ work( ir ), ldwrkr )
1223 *
1224 * Generate left bidiagonalizing vectors in WORK(IU)
1225 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1226 *
1227  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1228  $ work( itauq ), work( iwork ),
1229  $ lwork-iwork+1, ierr )
1230 *
1231 * Generate right bidiagonalizing vectors in WORK(IR)
1232 * (Workspace: need 2*N*N+4*N-1,
1233 * prefer 2*N*N+3*N+(N-1)*NB)
1234 *
1235  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1236  $ work( itaup ), work( iwork ),
1237  $ lwork-iwork+1, ierr )
1238  iwork = ie + n
1239 *
1240 * Perform bidiagonal QR iteration, computing left
1241 * singular vectors of R in WORK(IU) and computing
1242 * right singular vectors of R in WORK(IR)
1243 * (Workspace: need 2*N*N+BDSPAC)
1244 *
1245  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1246  $ work( ir ), ldwrkr, work( iu ),
1247  $ ldwrku, dum, 1, work( iwork ), info )
1248 *
1249 * Multiply Q in A by left singular vectors of R in
1250 * WORK(IU), storing result in U
1251 * (Workspace: need N*N)
1252 *
1253  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1254  $ work( iu ), ldwrku, zero, u, ldu )
1255 *
1256 * Copy right singular vectors of R to A
1257 * (Workspace: need N*N)
1258 *
1259  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1260  $ lda )
1261 *
1262  ELSE
1263 *
1264 * Insufficient workspace for a fast algorithm
1265 *
1266  itau = 1
1267  iwork = itau + n
1268 *
1269 * Compute A=Q*R, copying result to U
1270 * (Workspace: need 2*N, prefer N+N*NB)
1271 *
1272  CALL sgeqrf( m, n, a, lda, work( itau ),
1273  $ work( iwork ), lwork-iwork+1, ierr )
1274  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1275 *
1276 * Generate Q in U
1277 * (Workspace: need 2*N, prefer N+N*NB)
1278 *
1279  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1280  $ work( iwork ), lwork-iwork+1, ierr )
1281  ie = itau
1282  itauq = ie + n
1283  itaup = itauq + n
1284  iwork = itaup + n
1285 *
1286 * Zero out below R in A
1287 *
1288  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1289  $ lda )
1290 *
1291 * Bidiagonalize R in A
1292 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1293 *
1294  CALL sgebrd( n, n, a, lda, s, work( ie ),
1295  $ work( itauq ), work( itaup ),
1296  $ work( iwork ), lwork-iwork+1, ierr )
1297 *
1298 * Multiply Q in U by left vectors bidiagonalizing R
1299 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1300 *
1301  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1302  $ work( itauq ), u, ldu, work( iwork ),
1303  $ lwork-iwork+1, ierr )
1304 *
1305 * Generate right vectors bidiagonalizing R in A
1306 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1307 *
1308  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1309  $ work( iwork ), lwork-iwork+1, ierr )
1310  iwork = ie + n
1311 *
1312 * Perform bidiagonal QR iteration, computing left
1313 * singular vectors of A in U and computing right
1314 * singular vectors of A in A
1315 * (Workspace: need BDSPAC)
1316 *
1317  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1318  $ lda, u, ldu, dum, 1, work( iwork ),
1319  $ info )
1320 *
1321  END IF
1322 *
1323  ELSE IF( wntvas ) THEN
1324 *
1325 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1326 * or 'A')
1327 * N left singular vectors to be computed in U and
1328 * N right singular vectors to be computed in VT
1329 *
1330  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1331 *
1332 * Sufficient workspace for a fast algorithm
1333 *
1334  iu = 1
1335  IF( lwork.GE.wrkbl+lda*n ) THEN
1336 *
1337 * WORK(IU) is LDA by N
1338 *
1339  ldwrku = lda
1340  ELSE
1341 *
1342 * WORK(IU) is N by N
1343 *
1344  ldwrku = n
1345  END IF
1346  itau = iu + ldwrku*n
1347  iwork = itau + n
1348 *
1349 * Compute A=Q*R
1350 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1351 *
1352  CALL sgeqrf( m, n, a, lda, work( itau ),
1353  $ work( iwork ), lwork-iwork+1, ierr )
1354 *
1355 * Copy R to WORK(IU), zeroing out below it
1356 *
1357  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1358  $ ldwrku )
1359  CALL slaset( 'L', n-1, n-1, zero, zero,
1360  $ work( iu+1 ), ldwrku )
1361 *
1362 * Generate Q in A
1363 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1364 *
1365  CALL sorgqr( m, n, n, a, lda, work( itau ),
1366  $ work( iwork ), lwork-iwork+1, ierr )
1367  ie = itau
1368  itauq = ie + n
1369  itaup = itauq + n
1370  iwork = itaup + n
1371 *
1372 * Bidiagonalize R in WORK(IU), copying result to VT
1373 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1374 *
1375  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1376  $ work( ie ), work( itauq ),
1377  $ work( itaup ), work( iwork ),
1378  $ lwork-iwork+1, ierr )
1379  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1380  $ ldvt )
1381 *
1382 * Generate left bidiagonalizing vectors in WORK(IU)
1383 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1384 *
1385  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1386  $ work( itauq ), work( iwork ),
1387  $ lwork-iwork+1, ierr )
1388 *
1389 * Generate right bidiagonalizing vectors in VT
1390 * (Workspace: need N*N+4*N-1,
1391 * prefer N*N+3*N+(N-1)*NB)
1392 *
1393  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1394  $ work( iwork ), lwork-iwork+1, ierr )
1395  iwork = ie + n
1396 *
1397 * Perform bidiagonal QR iteration, computing left
1398 * singular vectors of R in WORK(IU) and computing
1399 * right singular vectors of R in VT
1400 * (Workspace: need N*N+BDSPAC)
1401 *
1402  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1403  $ ldvt, work( iu ), ldwrku, dum, 1,
1404  $ work( iwork ), info )
1405 *
1406 * Multiply Q in A by left singular vectors of R in
1407 * WORK(IU), storing result in U
1408 * (Workspace: need N*N)
1409 *
1410  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1411  $ work( iu ), ldwrku, zero, u, ldu )
1412 *
1413  ELSE
1414 *
1415 * Insufficient workspace for a fast algorithm
1416 *
1417  itau = 1
1418  iwork = itau + n
1419 *
1420 * Compute A=Q*R, copying result to U
1421 * (Workspace: need 2*N, prefer N+N*NB)
1422 *
1423  CALL sgeqrf( m, n, a, lda, work( itau ),
1424  $ work( iwork ), lwork-iwork+1, ierr )
1425  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1426 *
1427 * Generate Q in U
1428 * (Workspace: need 2*N, prefer N+N*NB)
1429 *
1430  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1431  $ work( iwork ), lwork-iwork+1, ierr )
1432 *
1433 * Copy R to VT, zeroing out below it
1434 *
1435  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1436  IF( n.GT.1 )
1437  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1438  $ vt( 2, 1 ), ldvt )
1439  ie = itau
1440  itauq = ie + n
1441  itaup = itauq + n
1442  iwork = itaup + n
1443 *
1444 * Bidiagonalize R in VT
1445 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1446 *
1447  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1448  $ work( itauq ), work( itaup ),
1449  $ work( iwork ), lwork-iwork+1, ierr )
1450 *
1451 * Multiply Q in U by left bidiagonalizing vectors
1452 * in VT
1453 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1454 *
1455  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1456  $ work( itauq ), u, ldu, work( iwork ),
1457  $ lwork-iwork+1, ierr )
1458 *
1459 * Generate right bidiagonalizing vectors in VT
1460 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1461 *
1462  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1463  $ work( iwork ), lwork-iwork+1, ierr )
1464  iwork = ie + n
1465 *
1466 * Perform bidiagonal QR iteration, computing left
1467 * singular vectors of A in U and computing right
1468 * singular vectors of A in VT
1469 * (Workspace: need BDSPAC)
1470 *
1471  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1472  $ ldvt, u, ldu, dum, 1, work( iwork ),
1473  $ info )
1474 *
1475  END IF
1476 *
1477  END IF
1478 *
1479  ELSE IF( wntua ) THEN
1480 *
1481  IF( wntvn ) THEN
1482 *
1483 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1484 * M left singular vectors to be computed in U and
1485 * no right singular vectors to be computed
1486 *
1487  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1488 *
1489 * Sufficient workspace for a fast algorithm
1490 *
1491  ir = 1
1492  IF( lwork.GE.wrkbl+lda*n ) THEN
1493 *
1494 * WORK(IR) is LDA by N
1495 *
1496  ldwrkr = lda
1497  ELSE
1498 *
1499 * WORK(IR) is N by N
1500 *
1501  ldwrkr = n
1502  END IF
1503  itau = ir + ldwrkr*n
1504  iwork = itau + n
1505 *
1506 * Compute A=Q*R, copying result to U
1507 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1508 *
1509  CALL sgeqrf( m, n, a, lda, work( itau ),
1510  $ work( iwork ), lwork-iwork+1, ierr )
1511  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1512 *
1513 * Copy R to WORK(IR), zeroing out below it
1514 *
1515  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1516  $ ldwrkr )
1517  CALL slaset( 'L', n-1, n-1, zero, zero,
1518  $ work( ir+1 ), ldwrkr )
1519 *
1520 * Generate Q in U
1521 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1522 *
1523  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1524  $ work( iwork ), lwork-iwork+1, ierr )
1525  ie = itau
1526  itauq = ie + n
1527  itaup = itauq + n
1528  iwork = itaup + n
1529 *
1530 * Bidiagonalize R in WORK(IR)
1531 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1532 *
1533  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1534  $ work( ie ), work( itauq ),
1535  $ work( itaup ), work( iwork ),
1536  $ lwork-iwork+1, ierr )
1537 *
1538 * Generate left bidiagonalizing vectors in WORK(IR)
1539 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1540 *
1541  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1542  $ work( itauq ), work( iwork ),
1543  $ lwork-iwork+1, ierr )
1544  iwork = ie + n
1545 *
1546 * Perform bidiagonal QR iteration, computing left
1547 * singular vectors of R in WORK(IR)
1548 * (Workspace: need N*N+BDSPAC)
1549 *
1550  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1551  $ 1, work( ir ), ldwrkr, dum, 1,
1552  $ work( iwork ), info )
1553 *
1554 * Multiply Q in U by left singular vectors of R in
1555 * WORK(IR), storing result in A
1556 * (Workspace: need N*N)
1557 *
1558  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1559  $ work( ir ), ldwrkr, zero, a, lda )
1560 *
1561 * Copy left singular vectors of A from A to U
1562 *
1563  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1564 *
1565  ELSE
1566 *
1567 * Insufficient workspace for a fast algorithm
1568 *
1569  itau = 1
1570  iwork = itau + n
1571 *
1572 * Compute A=Q*R, copying result to U
1573 * (Workspace: need 2*N, prefer N+N*NB)
1574 *
1575  CALL sgeqrf( m, n, a, lda, work( itau ),
1576  $ work( iwork ), lwork-iwork+1, ierr )
1577  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1578 *
1579 * Generate Q in U
1580 * (Workspace: need N+M, prefer N+M*NB)
1581 *
1582  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1583  $ work( iwork ), lwork-iwork+1, ierr )
1584  ie = itau
1585  itauq = ie + n
1586  itaup = itauq + n
1587  iwork = itaup + n
1588 *
1589 * Zero out below R in A
1590 *
1591  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1592  $ lda )
1593 *
1594 * Bidiagonalize R in A
1595 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1596 *
1597  CALL sgebrd( n, n, a, lda, s, work( ie ),
1598  $ work( itauq ), work( itaup ),
1599  $ work( iwork ), lwork-iwork+1, ierr )
1600 *
1601 * Multiply Q in U by left bidiagonalizing vectors
1602 * in A
1603 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1604 *
1605  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1606  $ work( itauq ), u, ldu, work( iwork ),
1607  $ lwork-iwork+1, ierr )
1608  iwork = ie + n
1609 *
1610 * Perform bidiagonal QR iteration, computing left
1611 * singular vectors of A in U
1612 * (Workspace: need BDSPAC)
1613 *
1614  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1615  $ 1, u, ldu, dum, 1, work( iwork ),
1616  $ info )
1617 *
1618  END IF
1619 *
1620  ELSE IF( wntvo ) THEN
1621 *
1622 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1623 * M left singular vectors to be computed in U and
1624 * N right singular vectors to be overwritten on A
1625 *
1626  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1627 *
1628 * Sufficient workspace for a fast algorithm
1629 *
1630  iu = 1
1631  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1632 *
1633 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1634 *
1635  ldwrku = lda
1636  ir = iu + ldwrku*n
1637  ldwrkr = lda
1638  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1639 *
1640 * WORK(IU) is LDA by N and WORK(IR) is N by N
1641 *
1642  ldwrku = lda
1643  ir = iu + ldwrku*n
1644  ldwrkr = n
1645  ELSE
1646 *
1647 * WORK(IU) is N by N and WORK(IR) is N by N
1648 *
1649  ldwrku = n
1650  ir = iu + ldwrku*n
1651  ldwrkr = n
1652  END IF
1653  itau = ir + ldwrkr*n
1654  iwork = itau + n
1655 *
1656 * Compute A=Q*R, copying result to U
1657 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1658 *
1659  CALL sgeqrf( m, n, a, lda, work( itau ),
1660  $ work( iwork ), lwork-iwork+1, ierr )
1661  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1662 *
1663 * Generate Q in U
1664 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1665 *
1666  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1667  $ work( iwork ), lwork-iwork+1, ierr )
1668 *
1669 * Copy R to WORK(IU), zeroing out below it
1670 *
1671  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1672  $ ldwrku )
1673  CALL slaset( 'L', n-1, n-1, zero, zero,
1674  $ work( iu+1 ), ldwrku )
1675  ie = itau
1676  itauq = ie + n
1677  itaup = itauq + n
1678  iwork = itaup + n
1679 *
1680 * Bidiagonalize R in WORK(IU), copying result to
1681 * WORK(IR)
1682 * (Workspace: need 2*N*N+4*N,
1683 * prefer 2*N*N+3*N+2*N*NB)
1684 *
1685  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1686  $ work( ie ), work( itauq ),
1687  $ work( itaup ), work( iwork ),
1688  $ lwork-iwork+1, ierr )
1689  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1690  $ work( ir ), ldwrkr )
1691 *
1692 * Generate left bidiagonalizing vectors in WORK(IU)
1693 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1694 *
1695  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1696  $ work( itauq ), work( iwork ),
1697  $ lwork-iwork+1, ierr )
1698 *
1699 * Generate right bidiagonalizing vectors in WORK(IR)
1700 * (Workspace: need 2*N*N+4*N-1,
1701 * prefer 2*N*N+3*N+(N-1)*NB)
1702 *
1703  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1704  $ work( itaup ), work( iwork ),
1705  $ lwork-iwork+1, ierr )
1706  iwork = ie + n
1707 *
1708 * Perform bidiagonal QR iteration, computing left
1709 * singular vectors of R in WORK(IU) and computing
1710 * right singular vectors of R in WORK(IR)
1711 * (Workspace: need 2*N*N+BDSPAC)
1712 *
1713  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1714  $ work( ir ), ldwrkr, work( iu ),
1715  $ ldwrku, dum, 1, work( iwork ), info )
1716 *
1717 * Multiply Q in U by left singular vectors of R in
1718 * WORK(IU), storing result in A
1719 * (Workspace: need N*N)
1720 *
1721  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1722  $ work( iu ), ldwrku, zero, a, lda )
1723 *
1724 * Copy left singular vectors of A from A to U
1725 *
1726  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1727 *
1728 * Copy right singular vectors of R from WORK(IR) to A
1729 *
1730  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1731  $ lda )
1732 *
1733  ELSE
1734 *
1735 * Insufficient workspace for a fast algorithm
1736 *
1737  itau = 1
1738  iwork = itau + n
1739 *
1740 * Compute A=Q*R, copying result to U
1741 * (Workspace: need 2*N, prefer N+N*NB)
1742 *
1743  CALL sgeqrf( m, n, a, lda, work( itau ),
1744  $ work( iwork ), lwork-iwork+1, ierr )
1745  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1746 *
1747 * Generate Q in U
1748 * (Workspace: need N+M, prefer N+M*NB)
1749 *
1750  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1751  $ work( iwork ), lwork-iwork+1, ierr )
1752  ie = itau
1753  itauq = ie + n
1754  itaup = itauq + n
1755  iwork = itaup + n
1756 *
1757 * Zero out below R in A
1758 *
1759  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1760  $ lda )
1761 *
1762 * Bidiagonalize R in A
1763 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1764 *
1765  CALL sgebrd( n, n, a, lda, s, work( ie ),
1766  $ work( itauq ), work( itaup ),
1767  $ work( iwork ), lwork-iwork+1, ierr )
1768 *
1769 * Multiply Q in U by left bidiagonalizing vectors
1770 * in A
1771 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1772 *
1773  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1774  $ work( itauq ), u, ldu, work( iwork ),
1775  $ lwork-iwork+1, ierr )
1776 *
1777 * Generate right bidiagonalizing vectors in A
1778 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1779 *
1780  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1781  $ work( iwork ), lwork-iwork+1, ierr )
1782  iwork = ie + n
1783 *
1784 * Perform bidiagonal QR iteration, computing left
1785 * singular vectors of A in U and computing right
1786 * singular vectors of A in A
1787 * (Workspace: need BDSPAC)
1788 *
1789  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1790  $ lda, u, ldu, dum, 1, work( iwork ),
1791  $ info )
1792 *
1793  END IF
1794 *
1795  ELSE IF( wntvas ) THEN
1796 *
1797 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1798 * or 'A')
1799 * M left singular vectors to be computed in U and
1800 * N right singular vectors to be computed in VT
1801 *
1802  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1803 *
1804 * Sufficient workspace for a fast algorithm
1805 *
1806  iu = 1
1807  IF( lwork.GE.wrkbl+lda*n ) THEN
1808 *
1809 * WORK(IU) is LDA by N
1810 *
1811  ldwrku = lda
1812  ELSE
1813 *
1814 * WORK(IU) is N by N
1815 *
1816  ldwrku = n
1817  END IF
1818  itau = iu + ldwrku*n
1819  iwork = itau + n
1820 *
1821 * Compute A=Q*R, copying result to U
1822 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1823 *
1824  CALL sgeqrf( m, n, a, lda, work( itau ),
1825  $ work( iwork ), lwork-iwork+1, ierr )
1826  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1827 *
1828 * Generate Q in U
1829 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1830 *
1831  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1832  $ work( iwork ), lwork-iwork+1, ierr )
1833 *
1834 * Copy R to WORK(IU), zeroing out below it
1835 *
1836  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1837  $ ldwrku )
1838  CALL slaset( 'L', n-1, n-1, zero, zero,
1839  $ work( iu+1 ), ldwrku )
1840  ie = itau
1841  itauq = ie + n
1842  itaup = itauq + n
1843  iwork = itaup + n
1844 *
1845 * Bidiagonalize R in WORK(IU), copying result to VT
1846 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1847 *
1848  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1849  $ work( ie ), work( itauq ),
1850  $ work( itaup ), work( iwork ),
1851  $ lwork-iwork+1, ierr )
1852  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1853  $ ldvt )
1854 *
1855 * Generate left bidiagonalizing vectors in WORK(IU)
1856 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1857 *
1858  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1859  $ work( itauq ), work( iwork ),
1860  $ lwork-iwork+1, ierr )
1861 *
1862 * Generate right bidiagonalizing vectors in VT
1863 * (Workspace: need N*N+4*N-1,
1864 * prefer N*N+3*N+(N-1)*NB)
1865 *
1866  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1867  $ work( iwork ), lwork-iwork+1, ierr )
1868  iwork = ie + n
1869 *
1870 * Perform bidiagonal QR iteration, computing left
1871 * singular vectors of R in WORK(IU) and computing
1872 * right singular vectors of R in VT
1873 * (Workspace: need N*N+BDSPAC)
1874 *
1875  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1876  $ ldvt, work( iu ), ldwrku, dum, 1,
1877  $ work( iwork ), info )
1878 *
1879 * Multiply Q in U by left singular vectors of R in
1880 * WORK(IU), storing result in A
1881 * (Workspace: need N*N)
1882 *
1883  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1884  $ work( iu ), ldwrku, zero, a, lda )
1885 *
1886 * Copy left singular vectors of A from A to U
1887 *
1888  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1889 *
1890  ELSE
1891 *
1892 * Insufficient workspace for a fast algorithm
1893 *
1894  itau = 1
1895  iwork = itau + n
1896 *
1897 * Compute A=Q*R, copying result to U
1898 * (Workspace: need 2*N, prefer N+N*NB)
1899 *
1900  CALL sgeqrf( m, n, a, lda, work( itau ),
1901  $ work( iwork ), lwork-iwork+1, ierr )
1902  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1903 *
1904 * Generate Q in U
1905 * (Workspace: need N+M, prefer N+M*NB)
1906 *
1907  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1908  $ work( iwork ), lwork-iwork+1, ierr )
1909 *
1910 * Copy R from A to VT, zeroing out below it
1911 *
1912  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1913  IF( n.GT.1 )
1914  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1915  $ vt( 2, 1 ), ldvt )
1916  ie = itau
1917  itauq = ie + n
1918  itaup = itauq + n
1919  iwork = itaup + n
1920 *
1921 * Bidiagonalize R in VT
1922 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1923 *
1924  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1925  $ work( itauq ), work( itaup ),
1926  $ work( iwork ), lwork-iwork+1, ierr )
1927 *
1928 * Multiply Q in U by left bidiagonalizing vectors
1929 * in VT
1930 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1931 *
1932  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1933  $ work( itauq ), u, ldu, work( iwork ),
1934  $ lwork-iwork+1, ierr )
1935 *
1936 * Generate right bidiagonalizing vectors in VT
1937 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1938 *
1939  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1940  $ work( iwork ), lwork-iwork+1, ierr )
1941  iwork = ie + n
1942 *
1943 * Perform bidiagonal QR iteration, computing left
1944 * singular vectors of A in U and computing right
1945 * singular vectors of A in VT
1946 * (Workspace: need BDSPAC)
1947 *
1948  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1949  $ ldvt, u, ldu, dum, 1, work( iwork ),
1950  $ info )
1951 *
1952  END IF
1953 *
1954  END IF
1955 *
1956  END IF
1957 *
1958  ELSE
1959 *
1960 * M .LT. MNTHR
1961 *
1962 * Path 10 (M at least N, but not much larger)
1963 * Reduce to bidiagonal form without QR decomposition
1964 *
1965  ie = 1
1966  itauq = ie + n
1967  itaup = itauq + n
1968  iwork = itaup + n
1969 *
1970 * Bidiagonalize A
1971 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1972 *
1973  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1974  $ work( itaup ), work( iwork ), lwork-iwork+1,
1975  $ ierr )
1976  IF( wntuas ) THEN
1977 *
1978 * If left singular vectors desired in U, copy result to U
1979 * and generate left bidiagonalizing vectors in U
1980 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1981 *
1982  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1983  IF( wntus )
1984  $ ncu = n
1985  IF( wntua )
1986  $ ncu = m
1987  CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1988  $ work( iwork ), lwork-iwork+1, ierr )
1989  END IF
1990  IF( wntvas ) THEN
1991 *
1992 * If right singular vectors desired in VT, copy result to
1993 * VT and generate right bidiagonalizing vectors in VT
1994 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1995 *
1996  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1997  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1998  $ work( iwork ), lwork-iwork+1, ierr )
1999  END IF
2000  IF( wntuo ) THEN
2001 *
2002 * If left singular vectors desired in A, generate left
2003 * bidiagonalizing vectors in A
2004 * (Workspace: need 4*N, prefer 3*N+N*NB)
2005 *
2006  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2007  $ work( iwork ), lwork-iwork+1, ierr )
2008  END IF
2009  IF( wntvo ) THEN
2010 *
2011 * If right singular vectors desired in A, generate right
2012 * bidiagonalizing vectors in A
2013 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2014 *
2015  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2016  $ work( iwork ), lwork-iwork+1, ierr )
2017  END IF
2018  iwork = ie + n
2019  IF( wntuas .OR. wntuo )
2020  $ nru = m
2021  IF( wntun )
2022  $ nru = 0
2023  IF( wntvas .OR. wntvo )
2024  $ ncvt = n
2025  IF( wntvn )
2026  $ ncvt = 0
2027  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2028 *
2029 * Perform bidiagonal QR iteration, if desired, computing
2030 * left singular vectors in U and computing right singular
2031 * vectors in VT
2032 * (Workspace: need BDSPAC)
2033 *
2034  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2035  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2036  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2037 *
2038 * Perform bidiagonal QR iteration, if desired, computing
2039 * left singular vectors in U and computing right singular
2040 * vectors in A
2041 * (Workspace: need BDSPAC)
2042 *
2043  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2044  $ u, ldu, dum, 1, work( iwork ), info )
2045  ELSE
2046 *
2047 * Perform bidiagonal QR iteration, if desired, computing
2048 * left singular vectors in A and computing right singular
2049 * vectors in VT
2050 * (Workspace: need BDSPAC)
2051 *
2052  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2053  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2054  END IF
2055 *
2056  END IF
2057 *
2058  ELSE
2059 *
2060 * A has more columns than rows. If A has sufficiently more
2061 * columns than rows, first reduce using the LQ decomposition (if
2062 * sufficient workspace available)
2063 *
2064  IF( n.GE.mnthr ) THEN
2065 *
2066  IF( wntvn ) THEN
2067 *
2068 * Path 1t(N much larger than M, JOBVT='N')
2069 * No right singular vectors to be computed
2070 *
2071  itau = 1
2072  iwork = itau + m
2073 *
2074 * Compute A=L*Q
2075 * (Workspace: need 2*M, prefer M+M*NB)
2076 *
2077  CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2078  $ lwork-iwork+1, ierr )
2079 *
2080 * Zero out above L
2081 *
2082  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2083  ie = 1
2084  itauq = ie + m
2085  itaup = itauq + m
2086  iwork = itaup + m
2087 *
2088 * Bidiagonalize L in A
2089 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2090 *
2091  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2092  $ work( itaup ), work( iwork ), lwork-iwork+1,
2093  $ ierr )
2094  IF( wntuo .OR. wntuas ) THEN
2095 *
2096 * If left singular vectors desired, generate Q
2097 * (Workspace: need 4*M, prefer 3*M+M*NB)
2098 *
2099  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2100  $ work( iwork ), lwork-iwork+1, ierr )
2101  END IF
2102  iwork = ie + m
2103  nru = 0
2104  IF( wntuo .OR. wntuas )
2105  $ nru = m
2106 *
2107 * Perform bidiagonal QR iteration, computing left singular
2108 * vectors of A in A if desired
2109 * (Workspace: need BDSPAC)
2110 *
2111  CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2112  $ lda, dum, 1, work( iwork ), info )
2113 *
2114 * If left singular vectors desired in U, copy them there
2115 *
2116  IF( wntuas )
2117  $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2118 *
2119  ELSE IF( wntvo .AND. wntun ) THEN
2120 *
2121 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2122 * M right singular vectors to be overwritten on A and
2123 * no left singular vectors to be computed
2124 *
2125  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2126 *
2127 * Sufficient workspace for a fast algorithm
2128 *
2129  ir = 1
2130  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2131 *
2132 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2133 *
2134  ldwrku = lda
2135  chunk = n
2136  ldwrkr = lda
2137  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2138 *
2139 * WORK(IU) is LDA by N and WORK(IR) is M by M
2140 *
2141  ldwrku = lda
2142  chunk = n
2143  ldwrkr = m
2144  ELSE
2145 *
2146 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2147 *
2148  ldwrku = m
2149  chunk = ( lwork-m*m-m ) / m
2150  ldwrkr = m
2151  END IF
2152  itau = ir + ldwrkr*m
2153  iwork = itau + m
2154 *
2155 * Compute A=L*Q
2156 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2157 *
2158  CALL sgelqf( m, n, a, lda, work( itau ),
2159  $ work( iwork ), lwork-iwork+1, ierr )
2160 *
2161 * Copy L to WORK(IR) and zero out above it
2162 *
2163  CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2164  CALL slaset( 'U', m-1, m-1, zero, zero,
2165  $ work( ir+ldwrkr ), ldwrkr )
2166 *
2167 * Generate Q in A
2168 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2169 *
2170  CALL sorglq( m, n, m, a, lda, work( itau ),
2171  $ work( iwork ), lwork-iwork+1, ierr )
2172  ie = itau
2173  itauq = ie + m
2174  itaup = itauq + m
2175  iwork = itaup + m
2176 *
2177 * Bidiagonalize L in WORK(IR)
2178 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2179 *
2180  CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2181  $ work( itauq ), work( itaup ),
2182  $ work( iwork ), lwork-iwork+1, ierr )
2183 *
2184 * Generate right vectors bidiagonalizing L
2185 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2186 *
2187  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2188  $ work( itaup ), work( iwork ),
2189  $ lwork-iwork+1, ierr )
2190  iwork = ie + m
2191 *
2192 * Perform bidiagonal QR iteration, computing right
2193 * singular vectors of L in WORK(IR)
2194 * (Workspace: need M*M+BDSPAC)
2195 *
2196  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2197  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2198  $ work( iwork ), info )
2199  iu = ie + m
2200 *
2201 * Multiply right singular vectors of L in WORK(IR) by Q
2202 * in A, storing result in WORK(IU) and copying to A
2203 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2204 *
2205  DO 30 i = 1, n, chunk
2206  blk = min( n-i+1, chunk )
2207  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2208  $ ldwrkr, a( 1, i ), lda, zero,
2209  $ work( iu ), ldwrku )
2210  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2211  $ a( 1, i ), lda )
2212  30 CONTINUE
2213 *
2214  ELSE
2215 *
2216 * Insufficient workspace for a fast algorithm
2217 *
2218  ie = 1
2219  itauq = ie + m
2220  itaup = itauq + m
2221  iwork = itaup + m
2222 *
2223 * Bidiagonalize A
2224 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2225 *
2226  CALL sgebrd( m, n, a, lda, s, work( ie ),
2227  $ work( itauq ), work( itaup ),
2228  $ work( iwork ), lwork-iwork+1, ierr )
2229 *
2230 * Generate right vectors bidiagonalizing A
2231 * (Workspace: need 4*M, prefer 3*M+M*NB)
2232 *
2233  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2234  $ work( iwork ), lwork-iwork+1, ierr )
2235  iwork = ie + m
2236 *
2237 * Perform bidiagonal QR iteration, computing right
2238 * singular vectors of A in A
2239 * (Workspace: need BDSPAC)
2240 *
2241  CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2242  $ dum, 1, dum, 1, work( iwork ), info )
2243 *
2244  END IF
2245 *
2246  ELSE IF( wntvo .AND. wntuas ) THEN
2247 *
2248 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2249 * M right singular vectors to be overwritten on A and
2250 * M left singular vectors to be computed in U
2251 *
2252  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2253 *
2254 * Sufficient workspace for a fast algorithm
2255 *
2256  ir = 1
2257  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2258 *
2259 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2260 *
2261  ldwrku = lda
2262  chunk = n
2263  ldwrkr = lda
2264  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2265 *
2266 * WORK(IU) is LDA by N and WORK(IR) is M by M
2267 *
2268  ldwrku = lda
2269  chunk = n
2270  ldwrkr = m
2271  ELSE
2272 *
2273 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2274 *
2275  ldwrku = m
2276  chunk = ( lwork-m*m-m ) / m
2277  ldwrkr = m
2278  END IF
2279  itau = ir + ldwrkr*m
2280  iwork = itau + m
2281 *
2282 * Compute A=L*Q
2283 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2284 *
2285  CALL sgelqf( m, n, a, lda, work( itau ),
2286  $ work( iwork ), lwork-iwork+1, ierr )
2287 *
2288 * Copy L to U, zeroing about above it
2289 *
2290  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2291  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2292  $ ldu )
2293 *
2294 * Generate Q in A
2295 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2296 *
2297  CALL sorglq( m, n, m, a, lda, work( itau ),
2298  $ work( iwork ), lwork-iwork+1, ierr )
2299  ie = itau
2300  itauq = ie + m
2301  itaup = itauq + m
2302  iwork = itaup + m
2303 *
2304 * Bidiagonalize L in U, copying result to WORK(IR)
2305 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2306 *
2307  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2308  $ work( itauq ), work( itaup ),
2309  $ work( iwork ), lwork-iwork+1, ierr )
2310  CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2311 *
2312 * Generate right vectors bidiagonalizing L in WORK(IR)
2313 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2314 *
2315  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2316  $ work( itaup ), work( iwork ),
2317  $ lwork-iwork+1, ierr )
2318 *
2319 * Generate left vectors bidiagonalizing L in U
2320 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2321 *
2322  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2323  $ work( iwork ), lwork-iwork+1, ierr )
2324  iwork = ie + m
2325 *
2326 * Perform bidiagonal QR iteration, computing left
2327 * singular vectors of L in U, and computing right
2328 * singular vectors of L in WORK(IR)
2329 * (Workspace: need M*M+BDSPAC)
2330 *
2331  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2332  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2333  $ work( iwork ), info )
2334  iu = ie + m
2335 *
2336 * Multiply right singular vectors of L in WORK(IR) by Q
2337 * in A, storing result in WORK(IU) and copying to A
2338 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2339 *
2340  DO 40 i = 1, n, chunk
2341  blk = min( n-i+1, chunk )
2342  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2343  $ ldwrkr, a( 1, i ), lda, zero,
2344  $ work( iu ), ldwrku )
2345  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2346  $ a( 1, i ), lda )
2347  40 CONTINUE
2348 *
2349  ELSE
2350 *
2351 * Insufficient workspace for a fast algorithm
2352 *
2353  itau = 1
2354  iwork = itau + m
2355 *
2356 * Compute A=L*Q
2357 * (Workspace: need 2*M, prefer M+M*NB)
2358 *
2359  CALL sgelqf( m, n, a, lda, work( itau ),
2360  $ work( iwork ), lwork-iwork+1, ierr )
2361 *
2362 * Copy L to U, zeroing out above it
2363 *
2364  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2365  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2366  $ ldu )
2367 *
2368 * Generate Q in A
2369 * (Workspace: need 2*M, prefer M+M*NB)
2370 *
2371  CALL sorglq( m, n, m, a, lda, work( itau ),
2372  $ work( iwork ), lwork-iwork+1, ierr )
2373  ie = itau
2374  itauq = ie + m
2375  itaup = itauq + m
2376  iwork = itaup + m
2377 *
2378 * Bidiagonalize L in U
2379 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2380 *
2381  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2382  $ work( itauq ), work( itaup ),
2383  $ work( iwork ), lwork-iwork+1, ierr )
2384 *
2385 * Multiply right vectors bidiagonalizing L by Q in A
2386 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2387 *
2388  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2389  $ work( itaup ), a, lda, work( iwork ),
2390  $ lwork-iwork+1, ierr )
2391 *
2392 * Generate left vectors bidiagonalizing L in U
2393 * (Workspace: need 4*M, prefer 3*M+M*NB)
2394 *
2395  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2396  $ work( iwork ), lwork-iwork+1, ierr )
2397  iwork = ie + m
2398 *
2399 * Perform bidiagonal QR iteration, computing left
2400 * singular vectors of A in U and computing right
2401 * singular vectors of A in A
2402 * (Workspace: need BDSPAC)
2403 *
2404  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2405  $ u, ldu, dum, 1, work( iwork ), info )
2406 *
2407  END IF
2408 *
2409  ELSE IF( wntvs ) THEN
2410 *
2411  IF( wntun ) THEN
2412 *
2413 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2414 * M right singular vectors to be computed in VT and
2415 * no left singular vectors to be computed
2416 *
2417  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2418 *
2419 * Sufficient workspace for a fast algorithm
2420 *
2421  ir = 1
2422  IF( lwork.GE.wrkbl+lda*m ) THEN
2423 *
2424 * WORK(IR) is LDA by M
2425 *
2426  ldwrkr = lda
2427  ELSE
2428 *
2429 * WORK(IR) is M by M
2430 *
2431  ldwrkr = m
2432  END IF
2433  itau = ir + ldwrkr*m
2434  iwork = itau + m
2435 *
2436 * Compute A=L*Q
2437 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2438 *
2439  CALL sgelqf( m, n, a, lda, work( itau ),
2440  $ work( iwork ), lwork-iwork+1, ierr )
2441 *
2442 * Copy L to WORK(IR), zeroing out above it
2443 *
2444  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2445  $ ldwrkr )
2446  CALL slaset( 'U', m-1, m-1, zero, zero,
2447  $ work( ir+ldwrkr ), ldwrkr )
2448 *
2449 * Generate Q in A
2450 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2451 *
2452  CALL sorglq( m, n, m, a, lda, work( itau ),
2453  $ work( iwork ), lwork-iwork+1, ierr )
2454  ie = itau
2455  itauq = ie + m
2456  itaup = itauq + m
2457  iwork = itaup + m
2458 *
2459 * Bidiagonalize L in WORK(IR)
2460 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2461 *
2462  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2463  $ work( ie ), work( itauq ),
2464  $ work( itaup ), work( iwork ),
2465  $ lwork-iwork+1, ierr )
2466 *
2467 * Generate right vectors bidiagonalizing L in
2468 * WORK(IR)
2469 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2470 *
2471  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2472  $ work( itaup ), work( iwork ),
2473  $ lwork-iwork+1, ierr )
2474  iwork = ie + m
2475 *
2476 * Perform bidiagonal QR iteration, computing right
2477 * singular vectors of L in WORK(IR)
2478 * (Workspace: need M*M+BDSPAC)
2479 *
2480  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2481  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2482  $ work( iwork ), info )
2483 *
2484 * Multiply right singular vectors of L in WORK(IR) by
2485 * Q in A, storing result in VT
2486 * (Workspace: need M*M)
2487 *
2488  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2489  $ ldwrkr, a, lda, zero, vt, ldvt )
2490 *
2491  ELSE
2492 *
2493 * Insufficient workspace for a fast algorithm
2494 *
2495  itau = 1
2496  iwork = itau + m
2497 *
2498 * Compute A=L*Q
2499 * (Workspace: need 2*M, prefer M+M*NB)
2500 *
2501  CALL sgelqf( m, n, a, lda, work( itau ),
2502  $ work( iwork ), lwork-iwork+1, ierr )
2503 *
2504 * Copy result to VT
2505 *
2506  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2507 *
2508 * Generate Q in VT
2509 * (Workspace: need 2*M, prefer M+M*NB)
2510 *
2511  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2512  $ work( iwork ), lwork-iwork+1, ierr )
2513  ie = itau
2514  itauq = ie + m
2515  itaup = itauq + m
2516  iwork = itaup + m
2517 *
2518 * Zero out above L in A
2519 *
2520  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2521  $ lda )
2522 *
2523 * Bidiagonalize L in A
2524 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2525 *
2526  CALL sgebrd( m, m, a, lda, s, work( ie ),
2527  $ work( itauq ), work( itaup ),
2528  $ work( iwork ), lwork-iwork+1, ierr )
2529 *
2530 * Multiply right vectors bidiagonalizing L by Q in VT
2531 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2532 *
2533  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2534  $ work( itaup ), vt, ldvt,
2535  $ work( iwork ), lwork-iwork+1, ierr )
2536  iwork = ie + m
2537 *
2538 * Perform bidiagonal QR iteration, computing right
2539 * singular vectors of A in VT
2540 * (Workspace: need BDSPAC)
2541 *
2542  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2543  $ ldvt, dum, 1, dum, 1, work( iwork ),
2544  $ info )
2545 *
2546  END IF
2547 *
2548  ELSE IF( wntuo ) THEN
2549 *
2550 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2551 * M right singular vectors to be computed in VT and
2552 * M left singular vectors to be overwritten on A
2553 *
2554  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2555 *
2556 * Sufficient workspace for a fast algorithm
2557 *
2558  iu = 1
2559  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2560 *
2561 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2562 *
2563  ldwrku = lda
2564  ir = iu + ldwrku*m
2565  ldwrkr = lda
2566  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2567 *
2568 * WORK(IU) is LDA by M and WORK(IR) is M by M
2569 *
2570  ldwrku = lda
2571  ir = iu + ldwrku*m
2572  ldwrkr = m
2573  ELSE
2574 *
2575 * WORK(IU) is M by M and WORK(IR) is M by M
2576 *
2577  ldwrku = m
2578  ir = iu + ldwrku*m
2579  ldwrkr = m
2580  END IF
2581  itau = ir + ldwrkr*m
2582  iwork = itau + m
2583 *
2584 * Compute A=L*Q
2585 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2586 *
2587  CALL sgelqf( m, n, a, lda, work( itau ),
2588  $ work( iwork ), lwork-iwork+1, ierr )
2589 *
2590 * Copy L to WORK(IU), zeroing out below it
2591 *
2592  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2593  $ ldwrku )
2594  CALL slaset( 'U', m-1, m-1, zero, zero,
2595  $ work( iu+ldwrku ), ldwrku )
2596 *
2597 * Generate Q in A
2598 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2599 *
2600  CALL sorglq( m, n, m, a, lda, work( itau ),
2601  $ work( iwork ), lwork-iwork+1, ierr )
2602  ie = itau
2603  itauq = ie + m
2604  itaup = itauq + m
2605  iwork = itaup + m
2606 *
2607 * Bidiagonalize L in WORK(IU), copying result to
2608 * WORK(IR)
2609 * (Workspace: need 2*M*M+4*M,
2610 * prefer 2*M*M+3*M+2*M*NB)
2611 *
2612  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2613  $ work( ie ), work( itauq ),
2614  $ work( itaup ), work( iwork ),
2615  $ lwork-iwork+1, ierr )
2616  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2617  $ work( ir ), ldwrkr )
2618 *
2619 * Generate right bidiagonalizing vectors in WORK(IU)
2620 * (Workspace: need 2*M*M+4*M-1,
2621 * prefer 2*M*M+3*M+(M-1)*NB)
2622 *
2623  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2624  $ work( itaup ), work( iwork ),
2625  $ lwork-iwork+1, ierr )
2626 *
2627 * Generate left bidiagonalizing vectors in WORK(IR)
2628 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2629 *
2630  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2631  $ work( itauq ), work( iwork ),
2632  $ lwork-iwork+1, ierr )
2633  iwork = ie + m
2634 *
2635 * Perform bidiagonal QR iteration, computing left
2636 * singular vectors of L in WORK(IR) and computing
2637 * right singular vectors of L in WORK(IU)
2638 * (Workspace: need 2*M*M+BDSPAC)
2639 *
2640  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2641  $ work( iu ), ldwrku, work( ir ),
2642  $ ldwrkr, dum, 1, work( iwork ), info )
2643 *
2644 * Multiply right singular vectors of L in WORK(IU) by
2645 * Q in A, storing result in VT
2646 * (Workspace: need M*M)
2647 *
2648  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2649  $ ldwrku, a, lda, zero, vt, ldvt )
2650 *
2651 * Copy left singular vectors of L to A
2652 * (Workspace: need M*M)
2653 *
2654  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2655  $ lda )
2656 *
2657  ELSE
2658 *
2659 * Insufficient workspace for a fast algorithm
2660 *
2661  itau = 1
2662  iwork = itau + m
2663 *
2664 * Compute A=L*Q, copying result to VT
2665 * (Workspace: need 2*M, prefer M+M*NB)
2666 *
2667  CALL sgelqf( m, n, a, lda, work( itau ),
2668  $ work( iwork ), lwork-iwork+1, ierr )
2669  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2670 *
2671 * Generate Q in VT
2672 * (Workspace: need 2*M, prefer M+M*NB)
2673 *
2674  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2675  $ work( iwork ), lwork-iwork+1, ierr )
2676  ie = itau
2677  itauq = ie + m
2678  itaup = itauq + m
2679  iwork = itaup + m
2680 *
2681 * Zero out above L in A
2682 *
2683  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2684  $ lda )
2685 *
2686 * Bidiagonalize L in A
2687 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2688 *
2689  CALL sgebrd( m, m, a, lda, s, work( ie ),
2690  $ work( itauq ), work( itaup ),
2691  $ work( iwork ), lwork-iwork+1, ierr )
2692 *
2693 * Multiply right vectors bidiagonalizing L by Q in VT
2694 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2695 *
2696  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2697  $ work( itaup ), vt, ldvt,
2698  $ work( iwork ), lwork-iwork+1, ierr )
2699 *
2700 * Generate left bidiagonalizing vectors of L in A
2701 * (Workspace: need 4*M, prefer 3*M+M*NB)
2702 *
2703  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2704  $ work( iwork ), lwork-iwork+1, ierr )
2705  iwork = ie + m
2706 *
2707 * Perform bidiagonal QR iteration, compute left
2708 * singular vectors of A in A and compute right
2709 * singular vectors of A in VT
2710 * (Workspace: need BDSPAC)
2711 *
2712  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2713  $ ldvt, a, lda, dum, 1, work( iwork ),
2714  $ info )
2715 *
2716  END IF
2717 *
2718  ELSE IF( wntuas ) THEN
2719 *
2720 * Path 6t(N much larger than M, JOBU='S' or 'A',
2721 * JOBVT='S')
2722 * M right singular vectors to be computed in VT and
2723 * M left singular vectors to be computed in U
2724 *
2725  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2726 *
2727 * Sufficient workspace for a fast algorithm
2728 *
2729  iu = 1
2730  IF( lwork.GE.wrkbl+lda*m ) THEN
2731 *
2732 * WORK(IU) is LDA by N
2733 *
2734  ldwrku = lda
2735  ELSE
2736 *
2737 * WORK(IU) is LDA by M
2738 *
2739  ldwrku = m
2740  END IF
2741  itau = iu + ldwrku*m
2742  iwork = itau + m
2743 *
2744 * Compute A=L*Q
2745 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2746 *
2747  CALL sgelqf( m, n, a, lda, work( itau ),
2748  $ work( iwork ), lwork-iwork+1, ierr )
2749 *
2750 * Copy L to WORK(IU), zeroing out above it
2751 *
2752  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2753  $ ldwrku )
2754  CALL slaset( 'U', m-1, m-1, zero, zero,
2755  $ work( iu+ldwrku ), ldwrku )
2756 *
2757 * Generate Q in A
2758 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2759 *
2760  CALL sorglq( m, n, m, a, lda, work( itau ),
2761  $ work( iwork ), lwork-iwork+1, ierr )
2762  ie = itau
2763  itauq = ie + m
2764  itaup = itauq + m
2765  iwork = itaup + m
2766 *
2767 * Bidiagonalize L in WORK(IU), copying result to U
2768 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2769 *
2770  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2771  $ work( ie ), work( itauq ),
2772  $ work( itaup ), work( iwork ),
2773  $ lwork-iwork+1, ierr )
2774  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2775  $ ldu )
2776 *
2777 * Generate right bidiagonalizing vectors in WORK(IU)
2778 * (Workspace: need M*M+4*M-1,
2779 * prefer M*M+3*M+(M-1)*NB)
2780 *
2781  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2782  $ work( itaup ), work( iwork ),
2783  $ lwork-iwork+1, ierr )
2784 *
2785 * Generate left bidiagonalizing vectors in U
2786 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2787 *
2788  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2789  $ work( iwork ), lwork-iwork+1, ierr )
2790  iwork = ie + m
2791 *
2792 * Perform bidiagonal QR iteration, computing left
2793 * singular vectors of L in U and computing right
2794 * singular vectors of L in WORK(IU)
2795 * (Workspace: need M*M+BDSPAC)
2796 *
2797  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2798  $ work( iu ), ldwrku, u, ldu, dum, 1,
2799  $ work( iwork ), info )
2800 *
2801 * Multiply right singular vectors of L in WORK(IU) by
2802 * Q in A, storing result in VT
2803 * (Workspace: need M*M)
2804 *
2805  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2806  $ ldwrku, a, lda, zero, vt, ldvt )
2807 *
2808  ELSE
2809 *
2810 * Insufficient workspace for a fast algorithm
2811 *
2812  itau = 1
2813  iwork = itau + m
2814 *
2815 * Compute A=L*Q, copying result to VT
2816 * (Workspace: need 2*M, prefer M+M*NB)
2817 *
2818  CALL sgelqf( m, n, a, lda, work( itau ),
2819  $ work( iwork ), lwork-iwork+1, ierr )
2820  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2821 *
2822 * Generate Q in VT
2823 * (Workspace: need 2*M, prefer M+M*NB)
2824 *
2825  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2826  $ work( iwork ), lwork-iwork+1, ierr )
2827 *
2828 * Copy L to U, zeroing out above it
2829 *
2830  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2831  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2832  $ ldu )
2833  ie = itau
2834  itauq = ie + m
2835  itaup = itauq + m
2836  iwork = itaup + m
2837 *
2838 * Bidiagonalize L in U
2839 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2840 *
2841  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2842  $ work( itauq ), work( itaup ),
2843  $ work( iwork ), lwork-iwork+1, ierr )
2844 *
2845 * Multiply right bidiagonalizing vectors in U by Q
2846 * in VT
2847 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2848 *
2849  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2850  $ work( itaup ), vt, ldvt,
2851  $ work( iwork ), lwork-iwork+1, ierr )
2852 *
2853 * Generate left bidiagonalizing vectors in U
2854 * (Workspace: need 4*M, prefer 3*M+M*NB)
2855 *
2856  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2857  $ work( iwork ), lwork-iwork+1, ierr )
2858  iwork = ie + m
2859 *
2860 * Perform bidiagonal QR iteration, computing left
2861 * singular vectors of A in U and computing right
2862 * singular vectors of A in VT
2863 * (Workspace: need BDSPAC)
2864 *
2865  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2866  $ ldvt, u, ldu, dum, 1, work( iwork ),
2867  $ info )
2868 *
2869  END IF
2870 *
2871  END IF
2872 *
2873  ELSE IF( wntva ) THEN
2874 *
2875  IF( wntun ) THEN
2876 *
2877 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2878 * N right singular vectors to be computed in VT and
2879 * no left singular vectors to be computed
2880 *
2881  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2882 *
2883 * Sufficient workspace for a fast algorithm
2884 *
2885  ir = 1
2886  IF( lwork.GE.wrkbl+lda*m ) THEN
2887 *
2888 * WORK(IR) is LDA by M
2889 *
2890  ldwrkr = lda
2891  ELSE
2892 *
2893 * WORK(IR) is M by M
2894 *
2895  ldwrkr = m
2896  END IF
2897  itau = ir + ldwrkr*m
2898  iwork = itau + m
2899 *
2900 * Compute A=L*Q, copying result to VT
2901 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2902 *
2903  CALL sgelqf( m, n, a, lda, work( itau ),
2904  $ work( iwork ), lwork-iwork+1, ierr )
2905  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2906 *
2907 * Copy L to WORK(IR), zeroing out above it
2908 *
2909  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2910  $ ldwrkr )
2911  CALL slaset( 'U', m-1, m-1, zero, zero,
2912  $ work( ir+ldwrkr ), ldwrkr )
2913 *
2914 * Generate Q in VT
2915 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2916 *
2917  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2918  $ work( iwork ), lwork-iwork+1, ierr )
2919  ie = itau
2920  itauq = ie + m
2921  itaup = itauq + m
2922  iwork = itaup + m
2923 *
2924 * Bidiagonalize L in WORK(IR)
2925 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2926 *
2927  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2928  $ work( ie ), work( itauq ),
2929  $ work( itaup ), work( iwork ),
2930  $ lwork-iwork+1, ierr )
2931 *
2932 * Generate right bidiagonalizing vectors in WORK(IR)
2933 * (Workspace: need M*M+4*M-1,
2934 * prefer M*M+3*M+(M-1)*NB)
2935 *
2936  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2937  $ work( itaup ), work( iwork ),
2938  $ lwork-iwork+1, ierr )
2939  iwork = ie + m
2940 *
2941 * Perform bidiagonal QR iteration, computing right
2942 * singular vectors of L in WORK(IR)
2943 * (Workspace: need M*M+BDSPAC)
2944 *
2945  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2946  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2947  $ work( iwork ), info )
2948 *
2949 * Multiply right singular vectors of L in WORK(IR) by
2950 * Q in VT, storing result in A
2951 * (Workspace: need M*M)
2952 *
2953  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2954  $ ldwrkr, vt, ldvt, zero, a, lda )
2955 *
2956 * Copy right singular vectors of A from A to VT
2957 *
2958  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2959 *
2960  ELSE
2961 *
2962 * Insufficient workspace for a fast algorithm
2963 *
2964  itau = 1
2965  iwork = itau + m
2966 *
2967 * Compute A=L*Q, copying result to VT
2968 * (Workspace: need 2*M, prefer M+M*NB)
2969 *
2970  CALL sgelqf( m, n, a, lda, work( itau ),
2971  $ work( iwork ), lwork-iwork+1, ierr )
2972  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2973 *
2974 * Generate Q in VT
2975 * (Workspace: need M+N, prefer M+N*NB)
2976 *
2977  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2978  $ work( iwork ), lwork-iwork+1, ierr )
2979  ie = itau
2980  itauq = ie + m
2981  itaup = itauq + m
2982  iwork = itaup + m
2983 *
2984 * Zero out above L in A
2985 *
2986  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2987  $ lda )
2988 *
2989 * Bidiagonalize L in A
2990 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2991 *
2992  CALL sgebrd( m, m, a, lda, s, work( ie ),
2993  $ work( itauq ), work( itaup ),
2994  $ work( iwork ), lwork-iwork+1, ierr )
2995 *
2996 * Multiply right bidiagonalizing vectors in A by Q
2997 * in VT
2998 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2999 *
3000  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3001  $ work( itaup ), vt, ldvt,
3002  $ work( iwork ), lwork-iwork+1, ierr )
3003  iwork = ie + m
3004 *
3005 * Perform bidiagonal QR iteration, computing right
3006 * singular vectors of A in VT
3007 * (Workspace: need BDSPAC)
3008 *
3009  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3010  $ ldvt, dum, 1, dum, 1, work( iwork ),
3011  $ info )
3012 *
3013  END IF
3014 *
3015  ELSE IF( wntuo ) THEN
3016 *
3017 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3018 * N right singular vectors to be computed in VT and
3019 * M left singular vectors to be overwritten on A
3020 *
3021  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3022 *
3023 * Sufficient workspace for a fast algorithm
3024 *
3025  iu = 1
3026  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3027 *
3028 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3029 *
3030  ldwrku = lda
3031  ir = iu + ldwrku*m
3032  ldwrkr = lda
3033  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3034 *
3035 * WORK(IU) is LDA by M and WORK(IR) is M by M
3036 *
3037  ldwrku = lda
3038  ir = iu + ldwrku*m
3039  ldwrkr = m
3040  ELSE
3041 *
3042 * WORK(IU) is M by M and WORK(IR) is M by M
3043 *
3044  ldwrku = m
3045  ir = iu + ldwrku*m
3046  ldwrkr = m
3047  END IF
3048  itau = ir + ldwrkr*m
3049  iwork = itau + m
3050 *
3051 * Compute A=L*Q, copying result to VT
3052 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3053 *
3054  CALL sgelqf( m, n, a, lda, work( itau ),
3055  $ work( iwork ), lwork-iwork+1, ierr )
3056  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3057 *
3058 * Generate Q in VT
3059 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3060 *
3061  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3062  $ work( iwork ), lwork-iwork+1, ierr )
3063 *
3064 * Copy L to WORK(IU), zeroing out above it
3065 *
3066  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3067  $ ldwrku )
3068  CALL slaset( 'U', m-1, m-1, zero, zero,
3069  $ work( iu+ldwrku ), ldwrku )
3070  ie = itau
3071  itauq = ie + m
3072  itaup = itauq + m
3073  iwork = itaup + m
3074 *
3075 * Bidiagonalize L in WORK(IU), copying result to
3076 * WORK(IR)
3077 * (Workspace: need 2*M*M+4*M,
3078 * prefer 2*M*M+3*M+2*M*NB)
3079 *
3080  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3081  $ work( ie ), work( itauq ),
3082  $ work( itaup ), work( iwork ),
3083  $ lwork-iwork+1, ierr )
3084  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3085  $ work( ir ), ldwrkr )
3086 *
3087 * Generate right bidiagonalizing vectors in WORK(IU)
3088 * (Workspace: need 2*M*M+4*M-1,
3089 * prefer 2*M*M+3*M+(M-1)*NB)
3090 *
3091  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3092  $ work( itaup ), work( iwork ),
3093  $ lwork-iwork+1, ierr )
3094 *
3095 * Generate left bidiagonalizing vectors in WORK(IR)
3096 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3097 *
3098  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3099  $ work( itauq ), work( iwork ),
3100  $ lwork-iwork+1, ierr )
3101  iwork = ie + m
3102 *
3103 * Perform bidiagonal QR iteration, computing left
3104 * singular vectors of L in WORK(IR) and computing
3105 * right singular vectors of L in WORK(IU)
3106 * (Workspace: need 2*M*M+BDSPAC)
3107 *
3108  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3109  $ work( iu ), ldwrku, work( ir ),
3110  $ ldwrkr, dum, 1, work( iwork ), info )
3111 *
3112 * Multiply right singular vectors of L in WORK(IU) by
3113 * Q in VT, storing result in A
3114 * (Workspace: need M*M)
3115 *
3116  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3117  $ ldwrku, vt, ldvt, zero, a, lda )
3118 *
3119 * Copy right singular vectors of A from A to VT
3120 *
3121  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3122 *
3123 * Copy left singular vectors of A from WORK(IR) to A
3124 *
3125  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3126  $ lda )
3127 *
3128  ELSE
3129 *
3130 * Insufficient workspace for a fast algorithm
3131 *
3132  itau = 1
3133  iwork = itau + m
3134 *
3135 * Compute A=L*Q, copying result to VT
3136 * (Workspace: need 2*M, prefer M+M*NB)
3137 *
3138  CALL sgelqf( m, n, a, lda, work( itau ),
3139  $ work( iwork ), lwork-iwork+1, ierr )
3140  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3141 *
3142 * Generate Q in VT
3143 * (Workspace: need M+N, prefer M+N*NB)
3144 *
3145  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3146  $ work( iwork ), lwork-iwork+1, ierr )
3147  ie = itau
3148  itauq = ie + m
3149  itaup = itauq + m
3150  iwork = itaup + m
3151 *
3152 * Zero out above L in A
3153 *
3154  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3155  $ lda )
3156 *
3157 * Bidiagonalize L in A
3158 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3159 *
3160  CALL sgebrd( m, m, a, lda, s, work( ie ),
3161  $ work( itauq ), work( itaup ),
3162  $ work( iwork ), lwork-iwork+1, ierr )
3163 *
3164 * Multiply right bidiagonalizing vectors in A by Q
3165 * in VT
3166 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3167 *
3168  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3169  $ work( itaup ), vt, ldvt,
3170  $ work( iwork ), lwork-iwork+1, ierr )
3171 *
3172 * Generate left bidiagonalizing vectors in A
3173 * (Workspace: need 4*M, prefer 3*M+M*NB)
3174 *
3175  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3176  $ work( iwork ), lwork-iwork+1, ierr )
3177  iwork = ie + m
3178 *
3179 * Perform bidiagonal QR iteration, computing left
3180 * singular vectors of A in A and computing right
3181 * singular vectors of A in VT
3182 * (Workspace: need BDSPAC)
3183 *
3184  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3185  $ ldvt, a, lda, dum, 1, work( iwork ),
3186  $ info )
3187 *
3188  END IF
3189 *
3190  ELSE IF( wntuas ) THEN
3191 *
3192 * Path 9t(N much larger than M, JOBU='S' or 'A',
3193 * JOBVT='A')
3194 * N right singular vectors to be computed in VT and
3195 * M left singular vectors to be computed in U
3196 *
3197  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3198 *
3199 * Sufficient workspace for a fast algorithm
3200 *
3201  iu = 1
3202  IF( lwork.GE.wrkbl+lda*m ) THEN
3203 *
3204 * WORK(IU) is LDA by M
3205 *
3206  ldwrku = lda
3207  ELSE
3208 *
3209 * WORK(IU) is M by M
3210 *
3211  ldwrku = m
3212  END IF
3213  itau = iu + ldwrku*m
3214  iwork = itau + m
3215 *
3216 * Compute A=L*Q, copying result to VT
3217 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3218 *
3219  CALL sgelqf( m, n, a, lda, work( itau ),
3220  $ work( iwork ), lwork-iwork+1, ierr )
3221  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3222 *
3223 * Generate Q in VT
3224 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3225 *
3226  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3227  $ work( iwork ), lwork-iwork+1, ierr )
3228 *
3229 * Copy L to WORK(IU), zeroing out above it
3230 *
3231  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3232  $ ldwrku )
3233  CALL slaset( 'U', m-1, m-1, zero, zero,
3234  $ work( iu+ldwrku ), ldwrku )
3235  ie = itau
3236  itauq = ie + m
3237  itaup = itauq + m
3238  iwork = itaup + m
3239 *
3240 * Bidiagonalize L in WORK(IU), copying result to U
3241 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3242 *
3243  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3244  $ work( ie ), work( itauq ),
3245  $ work( itaup ), work( iwork ),
3246  $ lwork-iwork+1, ierr )
3247  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3248  $ ldu )
3249 *
3250 * Generate right bidiagonalizing vectors in WORK(IU)
3251 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3252 *
3253  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3254  $ work( itaup ), work( iwork ),
3255  $ lwork-iwork+1, ierr )
3256 *
3257 * Generate left bidiagonalizing vectors in U
3258 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3259 *
3260  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3261  $ work( iwork ), lwork-iwork+1, ierr )
3262  iwork = ie + m
3263 *
3264 * Perform bidiagonal QR iteration, computing left
3265 * singular vectors of L in U and computing right
3266 * singular vectors of L in WORK(IU)
3267 * (Workspace: need M*M+BDSPAC)
3268 *
3269  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3270  $ work( iu ), ldwrku, u, ldu, dum, 1,
3271  $ work( iwork ), info )
3272 *
3273 * Multiply right singular vectors of L in WORK(IU) by
3274 * Q in VT, storing result in A
3275 * (Workspace: need M*M)
3276 *
3277  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3278  $ ldwrku, vt, ldvt, zero, a, lda )
3279 *
3280 * Copy right singular vectors of A from A to VT
3281 *
3282  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3283 *
3284  ELSE
3285 *
3286 * Insufficient workspace for a fast algorithm
3287 *
3288  itau = 1
3289  iwork = itau + m
3290 *
3291 * Compute A=L*Q, copying result to VT
3292 * (Workspace: need 2*M, prefer M+M*NB)
3293 *
3294  CALL sgelqf( m, n, a, lda, work( itau ),
3295  $ work( iwork ), lwork-iwork+1, ierr )
3296  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3297 *
3298 * Generate Q in VT
3299 * (Workspace: need M+N, prefer M+N*NB)
3300 *
3301  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3302  $ work( iwork ), lwork-iwork+1, ierr )
3303 *
3304 * Copy L to U, zeroing out above it
3305 *
3306  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3307  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3308  $ ldu )
3309  ie = itau
3310  itauq = ie + m
3311  itaup = itauq + m
3312  iwork = itaup + m
3313 *
3314 * Bidiagonalize L in U
3315 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3316 *
3317  CALL sgebrd( m, m, u, ldu, s, work( ie ),
3318  $ work( itauq ), work( itaup ),
3319  $ work( iwork ), lwork-iwork+1, ierr )
3320 *
3321 * Multiply right bidiagonalizing vectors in U by Q
3322 * in VT
3323 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3324 *
3325  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3326  $ work( itaup ), vt, ldvt,
3327  $ work( iwork ), lwork-iwork+1, ierr )
3328 *
3329 * Generate left bidiagonalizing vectors in U
3330 * (Workspace: need 4*M, prefer 3*M+M*NB)
3331 *
3332  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3333  $ work( iwork ), lwork-iwork+1, ierr )
3334  iwork = ie + m
3335 *
3336 * Perform bidiagonal QR iteration, computing left
3337 * singular vectors of A in U and computing right
3338 * singular vectors of A in VT
3339 * (Workspace: need BDSPAC)
3340 *
3341  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3342  $ ldvt, u, ldu, dum, 1, work( iwork ),
3343  $ info )
3344 *
3345  END IF
3346 *
3347  END IF
3348 *
3349  END IF
3350 *
3351  ELSE
3352 *
3353 * N .LT. MNTHR
3354 *
3355 * Path 10t(N greater than M, but not much larger)
3356 * Reduce to bidiagonal form without LQ decomposition
3357 *
3358  ie = 1
3359  itauq = ie + m
3360  itaup = itauq + m
3361  iwork = itaup + m
3362 *
3363 * Bidiagonalize A
3364 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3365 *
3366  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3367  $ work( itaup ), work( iwork ), lwork-iwork+1,
3368  $ ierr )
3369  IF( wntuas ) THEN
3370 *
3371 * If left singular vectors desired in U, copy result to U
3372 * and generate left bidiagonalizing vectors in U
3373 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3374 *
3375  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3376  CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3377  $ work( iwork ), lwork-iwork+1, ierr )
3378  END IF
3379  IF( wntvas ) THEN
3380 *
3381 * If right singular vectors desired in VT, copy result to
3382 * VT and generate right bidiagonalizing vectors in VT
3383 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3384 *
3385  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3386  IF( wntva )
3387  $ nrvt = n
3388  IF( wntvs )
3389  $ nrvt = m
3390  CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3391  $ work( iwork ), lwork-iwork+1, ierr )
3392  END IF
3393  IF( wntuo ) THEN
3394 *
3395 * If left singular vectors desired in A, generate left
3396 * bidiagonalizing vectors in A
3397 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3398 *
3399  CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3400  $ work( iwork ), lwork-iwork+1, ierr )
3401  END IF
3402  IF( wntvo ) THEN
3403 *
3404 * If right singular vectors desired in A, generate right
3405 * bidiagonalizing vectors in A
3406 * (Workspace: need 4*M, prefer 3*M+M*NB)
3407 *
3408  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3409  $ work( iwork ), lwork-iwork+1, ierr )
3410  END IF
3411  iwork = ie + m
3412  IF( wntuas .OR. wntuo )
3413  $ nru = m
3414  IF( wntun )
3415  $ nru = 0
3416  IF( wntvas .OR. wntvo )
3417  $ ncvt = n
3418  IF( wntvn )
3419  $ ncvt = 0
3420  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3421 *
3422 * Perform bidiagonal QR iteration, if desired, computing
3423 * left singular vectors in U and computing right singular
3424 * vectors in VT
3425 * (Workspace: need BDSPAC)
3426 *
3427  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3428  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3429  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3430 *
3431 * Perform bidiagonal QR iteration, if desired, computing
3432 * left singular vectors in U and computing right singular
3433 * vectors in A
3434 * (Workspace: need BDSPAC)
3435 *
3436  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3437  $ u, ldu, dum, 1, work( iwork ), info )
3438  ELSE
3439 *
3440 * Perform bidiagonal QR iteration, if desired, computing
3441 * left singular vectors in A and computing right singular
3442 * vectors in VT
3443 * (Workspace: need BDSPAC)
3444 *
3445  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3446  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3447  END IF
3448 *
3449  END IF
3450 *
3451  END IF
3452 *
3453 * If SBDSQR failed to converge, copy unconverged superdiagonals
3454 * to WORK( 2:MINMN )
3455 *
3456  IF( info.NE.0 ) THEN
3457  IF( ie.GT.2 ) THEN
3458  DO 50 i = 1, minmn - 1
3459  work( i+1 ) = work( i+ie-1 )
3460  50 CONTINUE
3461  END IF
3462  IF( ie.LT.2 ) THEN
3463  DO 60 i = minmn - 1, 1, -1
3464  work( i+1 ) = work( i+ie-1 )
3465  60 CONTINUE
3466  END IF
3467  END IF
3468 *
3469 * Undo scaling if necessary
3470 *
3471  IF( iscl.EQ.1 ) THEN
3472  IF( anrm.GT.bignum )
3473  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3474  $ ierr )
3475  IF( info.NE.0 .AND. anrm.GT.bignum )
3476  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3477  $ minmn, ierr )
3478  IF( anrm.LT.smlnum )
3479  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3480  $ ierr )
3481  IF( info.NE.0 .AND. anrm.LT.smlnum )
3482  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3483  $ minmn, ierr )
3484  END IF
3485 *
3486 * Return optimal workspace in WORK(1)
3487 *
3488  work( 1 ) = maxwrk
3489 *
3490  RETURN
3491 *
3492 * End of SGESVD
3493 *
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:207
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112
real function slange(NORM, M, N, A, LDA, WORK)
SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition: slange.f:116
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:159
subroutine sbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
SBDSQR
Definition: sbdsqr.f:232
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:55
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:137
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:141
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
real function slamch(CMACH)
SLAMCH
Definition: slamch.f:69
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:138
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
Definition: tstiee.f:83
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:130
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:198
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:129

Here is the call graph for this function:

Here is the caller graph for this function: