zgesdd function

void zgesdd(
  1. String JOBZ,
  2. int M,
  3. int N,
  4. Matrix<Complex> A_,
  5. int LDA,
  6. Array<double> S_,
  7. Matrix<Complex> U_,
  8. int LDU,
  9. Matrix<Complex> VT_,
  10. int LDVT,
  11. Array<Complex> WORK_,
  12. int LWORK,
  13. Array<double> RWORK_,
  14. Array<int> IWORK_,
  15. Box<int> INFO,
)

Implementation

void zgesdd(
  final String JOBZ,
  final int M,
  final int N,
  final Matrix<Complex> A_,
  final int LDA,
  final Array<double> S_,
  final Matrix<Complex> U_,
  final int LDU,
  final Matrix<Complex> VT_,
  final int LDVT,
  final Array<Complex> WORK_,
  final int LWORK,
  final Array<double> RWORK_,
  final Array<int> IWORK_,
  final Box<int> INFO,
) {
  final A = A_.having(ld: LDA);
  final U = U_.having(ld: LDU);
  final VT = VT_.having(ld: LDVT);
  final S = S_.having();
  final WORK = WORK_.having();
  final RWORK = RWORK_.having();
  final IWORK = IWORK_.having();
  const ZERO = 0.0, ONE = 1.0;
  bool LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS;
  int BLK,
      CHUNK = 0,
      I,
      IE = 0,
      IL,
      IR,
      IRU,
      IRVT,
      ISCL,
      ITAU,
      ITAUP,
      ITAUQ,
      IU,
      IVT,
      LDWKVT,
      LDWRKL,
      LDWRKR,
      LDWRKU,
      MAXWRK,
      MINMN,
      MINWRK,
      MNTHR1,
      MNTHR2,
      NRWORK,
      NWORK,
      WRKBL;
  int LWORK_ZGEBRD_MN,
      LWORK_ZGEBRD_MM,
      LWORK_ZGEBRD_NN,
      LWORK_ZGELQF_MN,
      LWORK_ZGEQRF_MN,
      LWORK_ZUNGBR_P_MN,
      LWORK_ZUNGBR_P_NN,
      LWORK_ZUNGBR_Q_MN,
      LWORK_ZUNGBR_Q_MM,
      LWORK_ZUNGLQ_MN,
      LWORK_ZUNGLQ_NN,
      LWORK_ZUNGQR_MM,
      LWORK_ZUNGQR_MN,
      LWORK_ZUNMBR_PRC_MM,
      LWORK_ZUNMBR_QLN_MM,
      LWORK_ZUNMBR_PRC_MN,
      LWORK_ZUNMBR_QLN_MN,
      LWORK_ZUNMBR_PRC_NN,
      LWORK_ZUNMBR_QLN_NN;
  double ANRM, BIGNUM, EPS, SMLNUM;
  final IDUM = Array<int>(1);
  final DUM = Array<double>(1);
  final CDUM = Array<Complex>(1);
  final IERR = Box(0);

  // Test the input arguments

  INFO.value = 0;
  MINMN = min(M, N);
  MNTHR1 = MINMN * 17.0 ~/ 9.0;
  MNTHR2 = MINMN * 5.0 ~/ 3.0;
  WNTQA = lsame(JOBZ, 'A');
  WNTQS = lsame(JOBZ, 'S');
  WNTQAS = WNTQA || WNTQS;
  WNTQO = lsame(JOBZ, 'O');
  WNTQN = lsame(JOBZ, 'N');
  LQUERY = (LWORK == -1);
  MINWRK = 1;
  MAXWRK = 1;

  if (!(WNTQA || WNTQS || WNTQO || WNTQN)) {
    INFO.value = -1;
  } else if (M < 0) {
    INFO.value = -2;
  } else if (N < 0) {
    INFO.value = -3;
  } else if (LDA < max(1, M)) {
    INFO.value = -5;
  } else if (LDU < 1 || (WNTQAS && LDU < M) || (WNTQO && M < N && LDU < M)) {
    INFO.value = -8;
  } else if (LDVT < 1 ||
      (WNTQA && LDVT < N) ||
      (WNTQS && LDVT < MINMN) ||
      (WNTQO && M >= N && LDVT < N)) {
    INFO.value = -10;
  }

  // Compute workspace
  //   Note: Comments in the code beginning "Workspace:" describe the
  //   minimal amount of workspace allocated at that point in the code,
  //   as well as the preferred amount for good performance.
  //   CWorkspace refers to complex workspace, and RWorkspace to
  //   real workspace. NB refers to the optimal block size for the
  //   immediately following subroutine, as returned by ILAENV.)

  if (INFO.value == 0) {
    MINWRK = 1;
    MAXWRK = 1;
    if (M >= N && MINMN > 0) {
      // There is no complex work space needed for bidiagonal SVD
      // The real work space needed for bidiagonal SVD (dbdsdc) is
      // BDSPAC = 3*N*N + 4*N for singular values and vectors;
      // BDSPAC = 4*N         for singular values only;
      // not including e, RU, and RVT matrices.

      // Compute space preferred for each routine
      zgebrd(M, N, CDUM(1).asMatrix(M), M, DUM(1), DUM(1), CDUM(1), CDUM(1),
          CDUM(1), -1, IERR);
      LWORK_ZGEBRD_MN = CDUM[1].toInt();

      zgebrd(N, N, CDUM(1).asMatrix(N), N, DUM(1), DUM(1), CDUM(1), CDUM(1),
          CDUM(1), -1, IERR);
      LWORK_ZGEBRD_NN = CDUM[1].toInt();

      zgeqrf(M, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZGEQRF_MN = CDUM[1].toInt();

      zungbr('P', N, N, N, CDUM(1).asMatrix(N), N, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_P_NN = CDUM[1].toInt();

      zungbr('Q', M, M, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_Q_MM = CDUM[1].toInt();

      zungbr('Q', M, N, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_Q_MN = CDUM[1].toInt();

      zungqr(M, M, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGQR_MM = CDUM[1].toInt();

      zungqr(M, N, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGQR_MN = CDUM[1].toInt();

      zunmbr('P', 'R', 'C', N, N, N, CDUM(1).asMatrix(N), N, CDUM(1),
          CDUM(1).asMatrix(N), N, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_PRC_NN = CDUM[1].toInt();

      zunmbr('Q', 'L', 'N', M, M, N, CDUM(1).asMatrix(M), M, CDUM(1),
          CDUM(1).asMatrix(M), M, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_QLN_MM = CDUM[1].toInt();

      zunmbr('Q', 'L', 'N', M, N, N, CDUM(1).asMatrix(M), M, CDUM(1),
          CDUM(1).asMatrix(M), M, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_QLN_MN = CDUM[1].toInt();

      zunmbr('Q', 'L', 'N', N, N, N, CDUM(1).asMatrix(N), N, CDUM(1),
          CDUM(1).asMatrix(N), N, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_QLN_NN = CDUM[1].toInt();

      if (M >= MNTHR1) {
        if (WNTQN) {
          // Path 1 (M >> N, JOBZ='N')

          MAXWRK = N + LWORK_ZGEQRF_MN;
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZGEBRD_NN);
          MINWRK = 3 * N;
        } else if (WNTQO) {
          // Path 2 (M >> N, JOBZ='O')

          WRKBL = N + LWORK_ZGEQRF_MN;
          WRKBL = max(WRKBL, N + LWORK_ZUNGQR_MN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZGEBRD_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_QLN_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_PRC_NN);
          MAXWRK = M * N + N * N + WRKBL;
          MINWRK = 2 * N * N + 3 * N;
        } else if (WNTQS) {
          // Path 3 (M >> N, JOBZ='S')

          WRKBL = N + LWORK_ZGEQRF_MN;
          WRKBL = max(WRKBL, N + LWORK_ZUNGQR_MN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZGEBRD_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_QLN_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_PRC_NN);
          MAXWRK = N * N + WRKBL;
          MINWRK = N * N + 3 * N;
        } else if (WNTQA) {
          // Path 4 (M >> N, JOBZ='A')

          WRKBL = N + LWORK_ZGEQRF_MN;
          WRKBL = max(WRKBL, N + LWORK_ZUNGQR_MM);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZGEBRD_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_QLN_NN);
          WRKBL = max(WRKBL, 2 * N + LWORK_ZUNMBR_PRC_NN);
          MAXWRK = N * N + WRKBL;
          MINWRK = N * N + max(3 * N, N + M);
        }
      } else if (M >= MNTHR2) {
        // Path 5 (M >> N, but not as much as MNTHR1)

        MAXWRK = 2 * N + LWORK_ZGEBRD_MN;
        MINWRK = 2 * N + M;
        if (WNTQO) {
          // Path 5o (M >> N, JOBZ='O')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_P_NN);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_Q_MN);
          MAXWRK += M * N;
          MINWRK += N * N;
        } else if (WNTQS) {
          // Path 5s (M >> N, JOBZ='S')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_P_NN);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_Q_MN);
        } else if (WNTQA) {
          // Path 5a (M >> N, JOBZ='A')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_P_NN);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR_Q_MM);
        }
      } else {
        // Path 6 (M >= N, but not much larger)

        MAXWRK = 2 * N + LWORK_ZGEBRD_MN;
        MINWRK = 2 * N + M;
        if (WNTQO) {
          // Path 6o (M >= N, JOBZ='O')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_PRC_NN);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_QLN_MN);
          MAXWRK += M * N;
          MINWRK += N * N;
        } else if (WNTQS) {
          // Path 6s (M >= N, JOBZ='S')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_QLN_MN);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_PRC_NN);
        } else if (WNTQA) {
          // Path 6a (M >= N, JOBZ='A')
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_QLN_MM);
          MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR_PRC_NN);
        }
      }
    } else if (MINMN > 0) {
      // There is no complex work space needed for bidiagonal SVD
      // The real work space needed for bidiagonal SVD (dbdsdc) is
      // BDSPAC = 3*M*M + 4*M for singular values and vectors;
      // BDSPAC = 4*M         for singular values only;
      // not including e, RU, and RVT matrices.

      // Compute space preferred for each routine
      zgebrd(M, N, CDUM(1).asMatrix(M), M, DUM(1), DUM(1), CDUM(1), CDUM(1),
          CDUM(1), -1, IERR);
      LWORK_ZGEBRD_MN = CDUM[1].toInt();

      zgebrd(M, M, CDUM(1).asMatrix(M), M, DUM(1), DUM(1), CDUM(1), CDUM(1),
          CDUM(1), -1, IERR);
      LWORK_ZGEBRD_MM = CDUM[1].toInt();

      zgelqf(M, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZGELQF_MN = CDUM[1].toInt();

      zungbr('P', M, N, M, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_P_MN = CDUM[1].toInt();

      zungbr('P', N, N, M, CDUM(1).asMatrix(N), N, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_P_NN = CDUM[1].toInt();

      zungbr('Q', M, M, N, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGBR_Q_MM = CDUM[1].toInt();

      zunglq(M, N, M, CDUM(1).asMatrix(M), M, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGLQ_MN = CDUM[1].toInt();

      zunglq(N, N, M, CDUM(1).asMatrix(N), N, CDUM(1), CDUM(1), -1, IERR);
      LWORK_ZUNGLQ_NN = CDUM[1].toInt();

      zunmbr('P', 'R', 'C', M, M, M, CDUM(1).asMatrix(M), M, CDUM(1),
          CDUM(1).asMatrix(M), M, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_PRC_MM = CDUM[1].toInt();

      zunmbr('P', 'R', 'C', M, N, M, CDUM(1).asMatrix(M), M, CDUM(1),
          CDUM(1).asMatrix(M), M, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_PRC_MN = CDUM[1].toInt();

      zunmbr('P', 'R', 'C', N, N, M, CDUM(1).asMatrix(N), N, CDUM(1),
          CDUM(1).asMatrix(N), N, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_PRC_NN = CDUM[1].toInt();

      zunmbr('Q', 'L', 'N', M, M, M, CDUM(1).asMatrix(M), M, CDUM(1),
          CDUM(1).asMatrix(M), M, CDUM(1), -1, IERR);
      LWORK_ZUNMBR_QLN_MM = CDUM[1].toInt();

      if (N >= MNTHR1) {
        if (WNTQN) {
          // Path 1t (N >> M, JOBZ='N')

          MAXWRK = M + LWORK_ZGELQF_MN;
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZGEBRD_MM);
          MINWRK = 3 * M;
        } else if (WNTQO) {
          // Path 2t (N >> M, JOBZ='O')

          WRKBL = M + LWORK_ZGELQF_MN;
          WRKBL = max(WRKBL, M + LWORK_ZUNGLQ_MN);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZGEBRD_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_QLN_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_PRC_MM);
          MAXWRK = M * N + M * M + WRKBL;
          MINWRK = 2 * M * M + 3 * M;
        } else if (WNTQS) {
          // Path 3t (N >> M, JOBZ='S')

          WRKBL = M + LWORK_ZGELQF_MN;
          WRKBL = max(WRKBL, M + LWORK_ZUNGLQ_MN);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZGEBRD_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_QLN_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_PRC_MM);
          MAXWRK = M * M + WRKBL;
          MINWRK = M * M + 3 * M;
        } else if (WNTQA) {
          // Path 4t (N >> M, JOBZ='A')

          WRKBL = M + LWORK_ZGELQF_MN;
          WRKBL = max(WRKBL, M + LWORK_ZUNGLQ_NN);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZGEBRD_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_QLN_MM);
          WRKBL = max(WRKBL, 2 * M + LWORK_ZUNMBR_PRC_MM);
          MAXWRK = M * M + WRKBL;
          MINWRK = M * M + max(3 * M, M + N);
        }
      } else if (N >= MNTHR2) {
        // Path 5t (N >> M, but not as much as MNTHR1)

        MAXWRK = 2 * M + LWORK_ZGEBRD_MN;
        MINWRK = 2 * M + N;
        if (WNTQO) {
          // Path 5to (N >> M, JOBZ='O')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_Q_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_P_MN);
          MAXWRK += M * N;
          MINWRK += M * M;
        } else if (WNTQS) {
          // Path 5ts (N >> M, JOBZ='S')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_Q_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_P_MN);
        } else if (WNTQA) {
          // Path 5ta (N >> M, JOBZ='A')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_Q_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR_P_NN);
        }
      } else {
        // Path 6t (N > M, but not much larger)

        MAXWRK = 2 * M + LWORK_ZGEBRD_MN;
        MINWRK = 2 * M + N;
        if (WNTQO) {
          // Path 6to (N > M, JOBZ='O')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_QLN_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_PRC_MN);
          MAXWRK += M * N;
          MINWRK += M * M;
        } else if (WNTQS) {
          // Path 6ts (N > M, JOBZ='S')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_QLN_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_PRC_MN);
        } else if (WNTQA) {
          // Path 6ta (N > M, JOBZ='A')
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_QLN_MM);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR_PRC_NN);
        }
      }
    }
    MAXWRK = max(MAXWRK, MINWRK);
  }
  if (INFO.value == 0) {
    WORK[1] = droundup_lwork(MAXWRK).toComplex();
    if (LWORK < MINWRK && !LQUERY) {
      INFO.value = -12;
    }
  }

  if (INFO.value != 0) {
    xerbla('ZGESDD', -INFO.value);
    return;
  } else if (LQUERY) {
    return;
  }

  // Quick return if possible

  if (M == 0 || N == 0) {
    return;
  }

  // Get machine constants

  EPS = dlamch('P');
  SMLNUM = sqrt(dlamch('S')) / EPS;
  BIGNUM = ONE / SMLNUM;

  // Scale A if max element outside range [SMLNUM,BIGNUM]

  ANRM = zlange('M', M, N, A, LDA, DUM);
  if (disnan(ANRM)) {
    INFO.value = -4;
    return;
  }
  ISCL = 0;
  if (ANRM > ZERO && ANRM < SMLNUM) {
    ISCL = 1;
    zlascl('G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR);
  } else if (ANRM > BIGNUM) {
    ISCL = 1;
    zlascl('G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR);
  }

  if (M >= N) {
    // A has at least as many rows as columns. If A has sufficiently
    // more rows than columns, first reduce using the QR
    // decomposition (if sufficient workspace available)

    if (M >= MNTHR1) {
      if (WNTQN) {
        // Path 1 (M >> N, JOBZ='N')
        // No singular vectors to be computed

        ITAU = 1;
        NWORK = ITAU + N;

        // Compute A=Q*R
        // CWorkspace: need   N [tau] + N    [work]
        // CWorkspace: prefer N [tau] + N*NB [work]
        // RWorkspace: need   0

        zgeqrf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Zero out below R

        zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero, A(2, 1), LDA);
        IE = 1;
        ITAUQ = 1;
        ITAUP = ITAUQ + N;
        NWORK = ITAUP + N;

        // Bidiagonalize R in A
        // CWorkspace: need   2*N [tauq, taup] + N      [work]
        // CWorkspace: prefer 2*N [tauq, taup] + 2*N*NB [work]
        // RWorkspace: need   N [e]

        zgebrd(N, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP),
            WORK(NWORK), LWORK - NWORK + 1, IERR);
        NRWORK = IE + N;

        // Perform bidiagonal SVD, compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + BDSPAC

        dbdsdc('U', 'N', N, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        // Path 2 (M >> N, JOBZ='O')
        // N left singular vectors to be overwritten on A and
        // N right singular vectors to be computed in VT

        IU = 1;

        // WORK(IU) is N by N

        LDWRKU = N;
        IR = IU + LDWRKU * N;
        if (LWORK >= M * N + N * N + 3 * N) {
          // WORK(IR) is M by N

          LDWRKR = M;
        } else {
          LDWRKR = (LWORK - N * N - 3 * N) ~/ N;
        }
        ITAU = IR + LDWRKR * N;
        NWORK = ITAU + N;

        // Compute A=Q*R
        // CWorkspace: need   N*N [U] + N*N [R] + N [tau] + N    [work]
        // CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
        // RWorkspace: need   0

        zgeqrf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy R to WORK( IR ), zeroing out below it

        zlacpy('U', N, N, A, LDA, WORK(IR).asMatrix(LDWRKR), LDWRKR);
        zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero,
            WORK(IR + 1).asMatrix(LDWRKR), LDWRKR);

        // Generate Q in A
        // CWorkspace: need   N*N [U] + N*N [R] + N [tau] + N    [work]
        // CWorkspace: prefer N*N [U] + N*N [R] + N [tau] + N*NB [work]
        // RWorkspace: need   0

        zungqr(
            M, N, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + N;
        NWORK = ITAUP + N;

        // Bidiagonalize R in WORK(IR)
        // CWorkspace: need   N*N [U] + N*N [R] + 2*N [tauq, taup] + N      [work]
        // CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
        // RWorkspace: need   N [e]

        zgebrd(N, N, WORK(IR).asMatrix(LDWRKR), LDWRKR, S, RWORK(IE),
            WORK(ITAUQ), WORK(ITAUP), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of R in WORK(IRU) and computing right singular vectors
        // of R in WORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = IE + N;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
        // Overwrite WORK(IU) by the left singular vectors of R
        // CWorkspace: need   N*N [U] + N*N [R] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N, WORK(IU).asMatrix(LDWRKU),
            LDWRKU);
        zunmbr(
            'Q',
            'L',
            'N',
            N,
            N,
            N,
            WORK(IR).asMatrix(LDWRKR),
            LDWRKR,
            WORK(ITAUQ),
            WORK(IU).asMatrix(LDWRKU),
            LDWRKU,
            WORK(NWORK),
            LWORK - NWORK + 1,
            IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by the right singular vectors of R
        // CWorkspace: need   N*N [U] + N*N [R] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [U] + N*N [R] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, WORK(IR).asMatrix(LDWRKR), LDWRKR,
            WORK(ITAUP), VT, LDVT, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Multiply Q in A by left singular vectors of R in
        // WORK(IU), storing result in WORK(IR) and copying to A
        // CWorkspace: need   N*N [U] + N*N [R]
        // CWorkspace: prefer N*N [U] + M*N [R]
        // RWorkspace: need   0

        for (I = 1; I <= M; I += LDWRKR) {
          CHUNK = min(M - I + 1, LDWRKR);
          zgemm(
              'N',
              'N',
              CHUNK,
              N,
              N,
              Complex.one,
              A(I, 1),
              LDA,
              WORK(IU).asMatrix(LDWRKU),
              LDWRKU,
              Complex.zero,
              WORK(IR).asMatrix(LDWRKR),
              LDWRKR);
          zlacpy(
              'F', CHUNK, N, WORK(IR).asMatrix(LDWRKR), LDWRKR, A(I, 1), LDA);
        }
      } else if (WNTQS) {
        // Path 3 (M >> N, JOBZ='S')
        // N left singular vectors to be computed in U and
        // N right singular vectors to be computed in VT

        IR = 1;

        // WORK(IR) is N by N

        LDWRKR = N;
        ITAU = IR + LDWRKR * N;
        NWORK = ITAU + N;

        // Compute A=Q*R
        // CWorkspace: need   N*N [R] + N [tau] + N    [work]
        // CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
        // RWorkspace: need   0

        zgeqrf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy R to WORK(IR), zeroing out below it

        zlacpy('U', N, N, A, LDA, WORK(IR).asMatrix(LDWRKR), LDWRKR);
        zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero,
            WORK(IR + 1).asMatrix(LDWRKR), LDWRKR);

        // Generate Q in A
        // CWorkspace: need   N*N [R] + N [tau] + N    [work]
        // CWorkspace: prefer N*N [R] + N [tau] + N*NB [work]
        // RWorkspace: need   0

        zungqr(
            M, N, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + N;
        NWORK = ITAUP + N;

        // Bidiagonalize R in WORK(IR)
        // CWorkspace: need   N*N [R] + 2*N [tauq, taup] + N      [work]
        // CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + 2*N*NB [work]
        // RWorkspace: need   N [e]

        zgebrd(N, N, WORK(IR).asMatrix(LDWRKR), LDWRKR, S, RWORK(IE),
            WORK(ITAUQ), WORK(ITAUP), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = IE + N;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of R
        // CWorkspace: need   N*N [R] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N, U, LDU);
        zunmbr('Q', 'L', 'N', N, N, N, WORK(IR).asMatrix(LDWRKR), LDWRKR,
            WORK(ITAUQ), U, LDU, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of R
        // CWorkspace: need   N*N [R] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [R] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, WORK(IR).asMatrix(LDWRKR), LDWRKR,
            WORK(ITAUP), VT, LDVT, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Multiply Q in A by left singular vectors of R in
        // WORK(IR), storing result in U
        // CWorkspace: need   N*N [R]
        // RWorkspace: need   0

        zlacpy('F', N, N, U, LDU, WORK(IR).asMatrix(LDWRKR), LDWRKR);
        zgemm('N', 'N', M, N, N, Complex.one, A, LDA, WORK(IR).asMatrix(LDWRKR),
            LDWRKR, Complex.zero, U, LDU);
      } else if (WNTQA) {
        // Path 4 (M >> N, JOBZ='A')
        // M left singular vectors to be computed in U and
        // N right singular vectors to be computed in VT

        IU = 1;

        // WORK(IU) is N by N

        LDWRKU = N;
        ITAU = IU + LDWRKU * N;
        NWORK = ITAU + N;

        // Compute A=Q*R, copying result to U
        // CWorkspace: need   N*N [U] + N [tau] + N    [work]
        // CWorkspace: prefer N*N [U] + N [tau] + N*NB [work]
        // RWorkspace: need   0

        zgeqrf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        zlacpy('L', M, N, A, LDA, U, LDU);

        // Generate Q in U
        // CWorkspace: need   N*N [U] + N [tau] + M    [work]
        // CWorkspace: prefer N*N [U] + N [tau] + M*NB [work]
        // RWorkspace: need   0

        zungqr(
            M, M, N, U, LDU, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Produce R in A, zeroing out below it

        zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero, A(2, 1), LDA);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + N;
        NWORK = ITAUP + N;

        // Bidiagonalize R in A
        // CWorkspace: need   N*N [U] + 2*N [tauq, taup] + N      [work]
        // CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + 2*N*NB [work]
        // RWorkspace: need   N [e]

        zgebrd(N, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP),
            WORK(NWORK), LWORK - NWORK + 1, IERR);
        IRU = IE + N;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
        // Overwrite WORK(IU) by left singular vectors of R
        // CWorkspace: need   N*N [U] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N, WORK(IU).asMatrix(LDWRKU),
            LDWRKU);
        zunmbr(
            'Q',
            'L',
            'N',
            N,
            N,
            N,
            A,
            LDA,
            WORK(ITAUQ),
            WORK(IU).asMatrix(LDWRKU),
            LDWRKU,
            WORK(NWORK),
            LWORK - NWORK + 1,
            IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of R
        // CWorkspace: need   N*N [U] + 2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer N*N [U] + 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Multiply Q in U by left singular vectors of R in
        // WORK(IU), storing result in A
        // CWorkspace: need   N*N [U]
        // RWorkspace: need   0

        zgemm('N', 'N', M, N, N, Complex.one, U, LDU, WORK(IU).asMatrix(LDWRKU),
            LDWRKU, Complex.zero, A, LDA);

        // Copy left singular vectors of A from A to U

        zlacpy('F', M, N, A, LDA, U, LDU);
      }
    } else if (M >= MNTHR2) {
      // MNTHR2 <= M < MNTHR1

      // Path 5 (M >> N, but not as much as MNTHR1)
      // Reduce to bidiagonal form without QR decomposition, use
      // ZUNGBR and matrix multiplication to compute singular vectors

      IE = 1;
      NRWORK = IE + N;
      ITAUQ = 1;
      ITAUP = ITAUQ + N;
      NWORK = ITAUP + N;

      // Bidiagonalize A
      // CWorkspace: need   2*N [tauq, taup] + M        [work]
      // CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
      // RWorkspace: need   N [e]

      zgebrd(M, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(NWORK),
          LWORK - NWORK + 1, IERR);
      if (WNTQN) {
        // Path 5n (M >> N, JOBZ='N')
        // Compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + BDSPAC

        dbdsdc('U', 'N', N, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        IU = NWORK;
        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;

        // Path 5o (M >> N, JOBZ='O')
        // Copy A to VT, generate P**H
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacpy('U', N, N, A, LDA, VT, LDVT);
        zungbr('P', N, N, N, VT, LDVT, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Generate Q in A
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zungbr('Q', M, N, N, A, LDA, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        if (LWORK >= M * N + 3 * N) {
          // WORK( IU ) is M by N

          LDWRKU = M;
        } else {
          // WORK(IU) is LDWRKU by N

          LDWRKU = (LWORK - 3 * N) ~/ N;
        }
        NWORK = IU + LDWRKU * N;

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply real matrix RWORK(IRVT) by P**H in VT,
        // storing the result in WORK(IU), copying to VT
        // CWorkspace: need   2*N [tauq, taup] + N*N [U]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]

        zlarcm(N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT,
            WORK(IU).asMatrix(LDWRKU), LDWRKU, RWORK(NRWORK));
        zlacpy('F', N, N, WORK(IU).asMatrix(LDWRKU), LDWRKU, VT, LDVT);

        // Multiply Q in A by real matrix RWORK(IRU), storing the
        // result in WORK(IU), copying to A
        // CWorkspace: need   2*N [tauq, taup] + N*N [U]
        // CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
        // RWorkspace: need   N [e] + N*N [RU] + 2*N*N [rwork]
        // RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here

        NRWORK = IRVT;
        for (I = 1; I <= M; I += LDWRKU) {
          CHUNK = min(M - I + 1, LDWRKU);
          zlacrm(CHUNK, N, A(I, 1), LDA, RWORK(IRU).asMatrix(N), N,
              WORK(IU).asMatrix(LDWRKU), LDWRKU, RWORK(NRWORK));
          zlacpy(
              'F', CHUNK, N, WORK(IU).asMatrix(LDWRKU), LDWRKU, A(I, 1), LDA);
        }
      } else if (WNTQS) {
        // Path 5s (M >> N, JOBZ='S')
        // Copy A to VT, generate P**H
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacpy('U', N, N, A, LDA, VT, LDVT);
        zungbr('P', N, N, N, VT, LDVT, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy A to U, generate Q
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacpy('L', M, N, A, LDA, U, LDU);
        zungbr('Q', M, N, N, U, LDU, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply real matrix RWORK(IRVT) by P**H in VT,
        // storing the result in A, copying to VT
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]

        zlarcm(
            N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT, A, LDA, RWORK(NRWORK));
        zlacpy('F', N, N, A, LDA, VT, LDVT);

        // Multiply Q in U by real matrix RWORK(IRU), storing the
        // result in A, copying to U
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here

        NRWORK = IRVT;
        zlacrm(M, N, U, LDU, RWORK(IRU).asMatrix(N), N, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, N, A, LDA, U, LDU);
      } else {
        // Path 5a (M >> N, JOBZ='A')
        // Copy A to VT, generate P**H
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacpy('U', N, N, A, LDA, VT, LDVT);
        zungbr('P', N, N, N, VT, LDVT, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy A to U, generate Q
        // CWorkspace: need   2*N [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacpy('L', M, N, A, LDA, U, LDU);
        zungbr('Q', M, M, N, U, LDU, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply real matrix RWORK(IRVT) by P**H in VT,
        // storing the result in A, copying to VT
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + 2*N*N [rwork]

        zlarcm(
            N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT, A, LDA, RWORK(NRWORK));
        zlacpy('F', N, N, A, LDA, VT, LDVT);

        // Multiply Q in U by real matrix RWORK(IRU), storing the
        // result in A, copying to U
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here

        NRWORK = IRVT;
        zlacrm(M, N, U, LDU, RWORK(IRU).asMatrix(N), N, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, N, A, LDA, U, LDU);
      }
    } else {
      // M < MNTHR2

      // Path 6 (M >= N, but not much larger)
      // Reduce to bidiagonal form without QR decomposition
      // Use ZUNMBR to compute singular vectors

      IE = 1;
      NRWORK = IE + N;
      ITAUQ = 1;
      ITAUP = ITAUQ + N;
      NWORK = ITAUP + N;

      // Bidiagonalize A
      // CWorkspace: need   2*N [tauq, taup] + M        [work]
      // CWorkspace: prefer 2*N [tauq, taup] + (M+N)*NB [work]
      // RWorkspace: need   N [e]

      zgebrd(M, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(NWORK),
          LWORK - NWORK + 1, IERR);
      if (WNTQN) {
        // Path 6n (M >= N, JOBZ='N')
        // Compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + BDSPAC

        dbdsdc('U', 'N', N, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        IU = NWORK;
        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        if (LWORK >= M * N + 3 * N) {
          // WORK( IU ) is M by N

          LDWRKU = M;
        } else {
          // WORK( IU ) is LDWRKU by N

          LDWRKU = (LWORK - 3 * N) ~/ N;
        }
        NWORK = IU + LDWRKU * N;

        // Path 6o (M >= N, JOBZ='O')
        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of A
        // CWorkspace: need   2*N [tauq, taup] + N*N [U] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT]

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);

        if (LWORK >= M * N + 3 * N) {
          // Path 6o-fast
          // Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
          // Overwrite WORK(IU) by left singular vectors of A, copying
          // to A
          // CWorkspace: need   2*N [tauq, taup] + M*N [U] + N    [work]
          // CWorkspace: prefer 2*N [tauq, taup] + M*N [U] + N*NB [work]
          // RWorkspace: need   N [e] + N*N [RU]

          zlaset('F', M, N, Complex.zero, Complex.zero,
              WORK(IU).asMatrix(LDWRKU), LDWRKU);
          zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N,
              WORK(IU).asMatrix(LDWRKU), LDWRKU);
          zunmbr(
              'Q',
              'L',
              'N',
              M,
              N,
              N,
              A,
              LDA,
              WORK(ITAUQ),
              WORK(IU).asMatrix(LDWRKU),
              LDWRKU,
              WORK(NWORK),
              LWORK - NWORK + 1,
              IERR);
          zlacpy('F', M, N, WORK(IU).asMatrix(LDWRKU), LDWRKU, A, LDA);
        } else {
          // Path 6o-slow
          // Generate Q in A
          // CWorkspace: need   2*N [tauq, taup] + N*N [U] + N    [work]
          // CWorkspace: prefer 2*N [tauq, taup] + N*N [U] + N*NB [work]
          // RWorkspace: need   0

          zungbr('Q', M, N, N, A, LDA, WORK(ITAUQ), WORK(NWORK),
              LWORK - NWORK + 1, IERR);

          // Multiply Q in A by real matrix RWORK(IRU), storing the
          // result in WORK(IU), copying to A
          // CWorkspace: need   2*N [tauq, taup] + N*N [U]
          // CWorkspace: prefer 2*N [tauq, taup] + M*N [U]
          // RWorkspace: need   N [e] + N*N [RU] + 2*N*N [rwork]
          // RWorkspace: prefer N [e] + N*N [RU] + 2*M*N [rwork] < N + 5*N*N since M < 2*N here

          NRWORK = IRVT;
          for (I = 1; I <= M; I += LDWRKU) {
            CHUNK = min(M - I + 1, LDWRKU);
            zlacrm(CHUNK, N, A(I, 1), LDA, RWORK(IRU).asMatrix(N), N,
                WORK(IU).asMatrix(LDWRKU), LDWRKU, RWORK(NRWORK));
            zlacpy(
                'F', CHUNK, N, WORK(IU).asMatrix(LDWRKU), LDWRKU, A(I, 1), LDA);
          }
        }
      } else if (WNTQS) {
        // Path 6s (M >= N, JOBZ='S')
        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of A
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT]

        zlaset('F', M, N, Complex.zero, Complex.zero, U, LDU);
        zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N, U, LDU);
        zunmbr('Q', 'L', 'N', M, N, N, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of A
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT]

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);
      } else {
        // Path 6a (M >= N, JOBZ='A')
        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT] + BDSPAC

        IRU = NRWORK;
        IRVT = IRU + N * N;
        NRWORK = IRVT + N * N;
        dbdsdc('U', 'I', N, S, RWORK(IE), RWORK(IRU).asMatrix(N), N,
            RWORK(IRVT).asMatrix(N), N, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Set the right corner of U to identity matrix

        zlaset('F', M, M, Complex.zero, Complex.zero, U, LDU);
        if (M > N) {
          zlaset('F', M - N, M - N, Complex.zero, Complex.one, U(N + 1, N + 1),
              LDU);
        }

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of A
        // CWorkspace: need   2*N [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + M*NB [work]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT]

        zlacp2('F', N, N, RWORK(IRU).asMatrix(N), N, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, N, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of A
        // CWorkspace: need   2*N [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*N [tauq, taup] + N*NB [work]
        // RWorkspace: need   N [e] + N*N [RU] + N*N [RVT]

        zlacp2('F', N, N, RWORK(IRVT).asMatrix(N), N, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, N, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);
      }
    }
  } else {
    // A has more columns than rows. If A has sufficiently more
    // columns than rows, first reduce using the LQ decomposition (if
    // sufficient workspace available)

    if (N >= MNTHR1) {
      if (WNTQN) {
        // Path 1t (N >> M, JOBZ='N')
        // No singular vectors to be computed

        ITAU = 1;
        NWORK = ITAU + M;

        // Compute A=L*Q
        // CWorkspace: need   M [tau] + M    [work]
        // CWorkspace: prefer M [tau] + M*NB [work]
        // RWorkspace: need   0

        zgelqf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Zero out above L

        zlaset('U', M - 1, M - 1, Complex.zero, Complex.zero, A(1, 2), LDA);
        IE = 1;
        ITAUQ = 1;
        ITAUP = ITAUQ + M;
        NWORK = ITAUP + M;

        // Bidiagonalize L in A
        // CWorkspace: need   2*M [tauq, taup] + M      [work]
        // CWorkspace: prefer 2*M [tauq, taup] + 2*M*NB [work]
        // RWorkspace: need   M [e]

        zgebrd(M, M, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP),
            WORK(NWORK), LWORK - NWORK + 1, IERR);
        NRWORK = IE + M;

        // Perform bidiagonal SVD, compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + BDSPAC

        dbdsdc('U', 'N', M, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        // Path 2t (N >> M, JOBZ='O')
        // M right singular vectors to be overwritten on A and
        // M left singular vectors to be computed in U

        IVT = 1;
        LDWKVT = M;

        // WORK(IVT) is M by M

        IL = IVT + LDWKVT * M;
        if (LWORK >= M * N + M * M + 3 * M) {
          // WORK(IL) M by N

          LDWRKL = M;
          CHUNK = N;
        } else {
          // WORK(IL) is M by CHUNK

          LDWRKL = M;
          CHUNK = (LWORK - M * M - 3 * M) ~/ M;
        }
        ITAU = IL + LDWRKL * CHUNK;
        NWORK = ITAU + M;

        // Compute A=L*Q
        // CWorkspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
        // CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
        // RWorkspace: need   0

        zgelqf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy L to WORK(IL), zeroing about above it

        zlacpy('L', M, M, A, LDA, WORK(IL).asMatrix(LDWRKL), LDWRKL);
        zlaset('U', M - 1, M - 1, Complex.zero, Complex.zero,
            WORK(IL + LDWRKL).asMatrix(LDWRKL), LDWRKL);

        // Generate Q in A
        // CWorkspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
        // CWorkspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
        // RWorkspace: need   0

        zunglq(
            M, N, M, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + M;
        NWORK = ITAUP + M;

        // Bidiagonalize L in WORK(IL)
        // CWorkspace: need   M*M [VT] + M*M [L] + 2*M [tauq, taup] + M      [work]
        // CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
        // RWorkspace: need   M [e]

        zgebrd(M, M, WORK(IL).asMatrix(LDWRKL), LDWRKL, S, RWORK(IE),
            WORK(ITAUQ), WORK(ITAUP), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RU] + M*M [RVT] + BDSPAC

        IRU = IE + M;
        IRVT = IRU + M * M;
        NRWORK = IRVT + M * M;
        dbdsdc('U', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
        // Overwrite WORK(IU) by the left singular vectors of L
        // CWorkspace: need   M*M [VT] + M*M [L] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, M, WORK(IL).asMatrix(LDWRKL), LDWRKL,
            WORK(ITAUQ), U, LDU, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
        // Overwrite WORK(IVT) by the right singular vectors of L
        // CWorkspace: need   M*M [VT] + M*M [L] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [VT] + M*M [L] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M,
            WORK(IVT).asMatrix(LDWKVT), LDWKVT);
        zunmbr(
            'P',
            'R',
            'C',
            M,
            M,
            M,
            WORK(IL).asMatrix(LDWRKL),
            LDWRKL,
            WORK(ITAUP),
            WORK(IVT).asMatrix(LDWKVT),
            LDWKVT,
            WORK(NWORK),
            LWORK - NWORK + 1,
            IERR);

        // Multiply right singular vectors of L in WORK(IL) by Q
        // in A, storing result in WORK(IL) and copying to A
        // CWorkspace: need   M*M [VT] + M*M [L]
        // CWorkspace: prefer M*M [VT] + M*N [L]
        // RWorkspace: need   0

        for (I = 1; I <= N; I += CHUNK) {
          BLK = min(N - I + 1, CHUNK);
          zgemm('N', 'N', M, BLK, M, Complex.one, WORK(IVT).asMatrix(M), M,
              A(1, I), LDA, Complex.zero, WORK(IL).asMatrix(LDWRKL), LDWRKL);
          zlacpy('F', M, BLK, WORK(IL).asMatrix(LDWRKL), LDWRKL, A(1, I), LDA);
        }
      } else if (WNTQS) {
        // Path 3t (N >> M, JOBZ='S')
        // M right singular vectors to be computed in VT and
        // M left singular vectors to be computed in U

        IL = 1;

        // WORK(IL) is M by M

        LDWRKL = M;
        ITAU = IL + LDWRKL * M;
        NWORK = ITAU + M;

        // Compute A=L*Q
        // CWorkspace: need   M*M [L] + M [tau] + M    [work]
        // CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
        // RWorkspace: need   0

        zgelqf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy L to WORK(IL), zeroing out above it

        zlacpy('L', M, M, A, LDA, WORK(IL).asMatrix(LDWRKL), LDWRKL);
        zlaset('U', M - 1, M - 1, Complex.zero, Complex.zero,
            WORK(IL + LDWRKL).asMatrix(LDWRKL), LDWRKL);

        // Generate Q in A
        // CWorkspace: need   M*M [L] + M [tau] + M    [work]
        // CWorkspace: prefer M*M [L] + M [tau] + M*NB [work]
        // RWorkspace: need   0

        zunglq(
            M, N, M, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + M;
        NWORK = ITAUP + M;

        // Bidiagonalize L in WORK(IL)
        // CWorkspace: need   M*M [L] + 2*M [tauq, taup] + M      [work]
        // CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + 2*M*NB [work]
        // RWorkspace: need   M [e]

        zgebrd(M, M, WORK(IL).asMatrix(LDWRKL), LDWRKL, S, RWORK(IE),
            WORK(ITAUQ), WORK(ITAUP), WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RU] + M*M [RVT] + BDSPAC

        IRU = IE + M;
        IRVT = IRU + M * M;
        NRWORK = IRVT + M * M;
        dbdsdc('U', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of L
        // CWorkspace: need   M*M [L] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, M, WORK(IL).asMatrix(LDWRKL), LDWRKL,
            WORK(ITAUQ), U, LDU, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by left singular vectors of L
        // CWorkspace: need   M*M [L] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [L] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M, VT, LDVT);
        zunmbr('P', 'R', 'C', M, M, M, WORK(IL).asMatrix(LDWRKL), LDWRKL,
            WORK(ITAUP), VT, LDVT, WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Copy VT to WORK(IL), multiply right singular vectors of L
        // in WORK(IL) by Q in A, storing result in VT
        // CWorkspace: need   M*M [L]
        // RWorkspace: need   0

        zlacpy('F', M, M, VT, LDVT, WORK(IL).asMatrix(LDWRKL), LDWRKL);
        zgemm('N', 'N', M, N, M, Complex.one, WORK(IL).asMatrix(LDWRKL), LDWRKL,
            A, LDA, Complex.zero, VT, LDVT);
      } else if (WNTQA) {
        // Path 4t (N >> M, JOBZ='A')
        // N right singular vectors to be computed in VT and
        // M left singular vectors to be computed in U

        IVT = 1;

        // WORK(IVT) is M by M

        LDWKVT = M;
        ITAU = IVT + LDWKVT * M;
        NWORK = ITAU + M;

        // Compute A=L*Q, copying result to VT
        // CWorkspace: need   M*M [VT] + M [tau] + M    [work]
        // CWorkspace: prefer M*M [VT] + M [tau] + M*NB [work]
        // RWorkspace: need   0

        zgelqf(M, N, A, LDA, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1, IERR);
        zlacpy('U', M, N, A, LDA, VT, LDVT);

        // Generate Q in VT
        // CWorkspace: need   M*M [VT] + M [tau] + N    [work]
        // CWorkspace: prefer M*M [VT] + M [tau] + N*NB [work]
        // RWorkspace: need   0

        zunglq(N, N, M, VT, LDVT, WORK(ITAU), WORK(NWORK), LWORK - NWORK + 1,
            IERR);

        // Produce L in A, zeroing out above it

        zlaset('U', M - 1, M - 1, Complex.zero, Complex.zero, A(1, 2), LDA);
        IE = 1;
        ITAUQ = ITAU;
        ITAUP = ITAUQ + M;
        NWORK = ITAUP + M;

        // Bidiagonalize L in A
        // CWorkspace: need   M*M [VT] + 2*M [tauq, taup] + M      [work]
        // CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + 2*M*NB [work]
        // RWorkspace: need   M [e]

        zgebrd(M, M, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP),
            WORK(NWORK), LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RU] + M*M [RVT] + BDSPAC

        IRU = IE + M;
        IRVT = IRU + M * M;
        NRWORK = IRVT + M * M;
        dbdsdc('U', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of L
        // CWorkspace: need   M*M [VT] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, M, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
        // Overwrite WORK(IVT) by right singular vectors of L
        // CWorkspace: need   M*M [VT] + 2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer M*M [VT] + 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M,
            WORK(IVT).asMatrix(LDWKVT), LDWKVT);
        zunmbr(
            'P',
            'R',
            'C',
            M,
            M,
            M,
            A,
            LDA,
            WORK(ITAUP),
            WORK(IVT).asMatrix(LDWKVT),
            LDWKVT,
            WORK(NWORK),
            LWORK - NWORK + 1,
            IERR);

        // Multiply right singular vectors of L in WORK(IVT) by
        // Q in VT, storing result in A
        // CWorkspace: need   M*M [VT]
        // RWorkspace: need   0

        zgemm('N', 'N', M, N, M, Complex.one, WORK(IVT).asMatrix(LDWKVT),
            LDWKVT, VT, LDVT, Complex.zero, A, LDA);

        // Copy right singular vectors of A from A to VT

        zlacpy('F', M, N, A, LDA, VT, LDVT);
      }
    } else if (N >= MNTHR2) {
      // MNTHR2 <= N < MNTHR1

      // Path 5t (N >> M, but not as much as MNTHR1)
      // Reduce to bidiagonal form without QR decomposition, use
      // ZUNGBR and matrix multiplication to compute singular vectors

      IE = 1;
      NRWORK = IE + M;
      ITAUQ = 1;
      ITAUP = ITAUQ + M;
      NWORK = ITAUP + M;

      // Bidiagonalize A
      // CWorkspace: need   2*M [tauq, taup] + N        [work]
      // CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
      // RWorkspace: need   M [e]

      zgebrd(M, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(NWORK),
          LWORK - NWORK + 1, IERR);

      if (WNTQN) {
        // Path 5tn (N >> M, JOBZ='N')
        // Compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + BDSPAC

        dbdsdc('L', 'N', M, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;
        IVT = NWORK;

        // Path 5to (N >> M, JOBZ='O')
        // Copy A to U, generate Q
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacpy('L', M, M, A, LDA, U, LDU);
        zungbr('Q', M, M, N, U, LDU, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Generate P**H in A
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zungbr('P', M, N, M, A, LDA, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        LDWKVT = M;
        if (LWORK >= M * N + 3 * M) {
          // WORK( IVT ) is M by N

          NWORK = IVT + LDWKVT * N;
          CHUNK = N;
        } else {
          // WORK( IVT ) is M by CHUNK

          CHUNK = (LWORK - 3 * M) ~/ M;
          NWORK = IVT + LDWKVT * CHUNK;
        }

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply Q in U by real matrix RWORK(IRVT)
        // storing the result in WORK(IVT), copying to U
        // CWorkspace: need   2*M [tauq, taup] + M*M [VT]
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]

        zlacrm(M, M, U, LDU, RWORK(IRU).asMatrix(M), M,
            WORK(IVT).asMatrix(LDWKVT), LDWKVT, RWORK(NRWORK));
        zlacpy('F', M, M, WORK(IVT).asMatrix(LDWKVT), LDWKVT, U, LDU);

        // Multiply RWORK(IRVT) by P**H in A, storing the
        // result in WORK(IVT), copying to A
        // CWorkspace: need   2*M [tauq, taup] + M*M [VT]
        // CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
        // RWorkspace: need   M [e] + M*M [RVT] + 2*M*M [rwork]
        // RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here

        NRWORK = IRU;
        for (I = 1; I <= N; I += CHUNK) {
          BLK = min(N - I + 1, CHUNK);
          zlarcm(M, BLK, RWORK(IRVT).asMatrix(M), M, A(1, I), LDA,
              WORK(IVT).asMatrix(LDWKVT), LDWKVT, RWORK(NRWORK));
          zlacpy('F', M, BLK, WORK(IVT).asMatrix(LDWKVT), LDWKVT, A(1, I), LDA);
        }
      } else if (WNTQS) {
        // Path 5ts (N >> M, JOBZ='S')
        // Copy A to U, generate Q
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacpy('L', M, M, A, LDA, U, LDU);
        zungbr('Q', M, M, N, U, LDU, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy A to VT, generate P**H
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacpy('U', M, N, A, LDA, VT, LDVT);
        zungbr('P', M, N, M, VT, LDVT, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;
        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply Q in U by real matrix RWORK(IRU), storing the
        // result in A, copying to U
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]

        zlacrm(M, M, U, LDU, RWORK(IRU).asMatrix(M), M, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, M, A, LDA, U, LDU);

        // Multiply real matrix RWORK(IRVT) by P**H in VT,
        // storing the result in A, copying to VT
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here

        NRWORK = IRU;
        zlarcm(
            M, N, RWORK(IRVT).asMatrix(M), M, VT, LDVT, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, N, A, LDA, VT, LDVT);
      } else {
        // Path 5ta (N >> M, JOBZ='A')
        // Copy A to U, generate Q
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   0

        zlacpy('L', M, M, A, LDA, U, LDU);
        zungbr('Q', M, M, N, U, LDU, WORK(ITAUQ), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy A to VT, generate P**H
        // CWorkspace: need   2*M [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
        // RWorkspace: need   0

        zlacpy('U', M, N, A, LDA, VT, LDVT);
        zungbr('P', N, N, M, VT, LDVT, WORK(ITAUP), WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;
        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Multiply Q in U by real matrix RWORK(IRU), storing the
        // result in A, copying to U
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + 2*M*M [rwork]

        zlacrm(M, M, U, LDU, RWORK(IRU).asMatrix(M), M, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, M, A, LDA, U, LDU);

        // Multiply real matrix RWORK(IRVT) by P**H in VT,
        // storing the result in A, copying to VT
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here

        NRWORK = IRU;
        zlarcm(
            M, N, RWORK(IRVT).asMatrix(M), M, VT, LDVT, A, LDA, RWORK(NRWORK));
        zlacpy('F', M, N, A, LDA, VT, LDVT);
      }
    } else {
      // N < MNTHR2

      // Path 6t (N > M, but not much larger)
      // Reduce to bidiagonal form without LQ decomposition
      // Use ZUNMBR to compute singular vectors

      IE = 1;
      NRWORK = IE + M;
      ITAUQ = 1;
      ITAUP = ITAUQ + M;
      NWORK = ITAUP + M;

      // Bidiagonalize A
      // CWorkspace: need   2*M [tauq, taup] + N        [work]
      // CWorkspace: prefer 2*M [tauq, taup] + (M+N)*NB [work]
      // RWorkspace: need   M [e]

      zgebrd(M, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(NWORK),
          LWORK - NWORK + 1, IERR);
      if (WNTQN) {
        // Path 6tn (N > M, JOBZ='N')
        // Compute singular values only
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + BDSPAC

        dbdsdc('L', 'N', M, S, RWORK(IE), DUM.asMatrix(1), 1, DUM.asMatrix(1),
            1, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);
      } else if (WNTQO) {
        // Path 6to (N > M, JOBZ='O')
        LDWKVT = M;
        IVT = NWORK;
        if (LWORK >= M * N + 3 * M) {
          // WORK( IVT ) is M by N

          zlaset('F', M, N, Complex.zero, Complex.zero,
              WORK(IVT).asMatrix(LDWKVT), LDWKVT);
          NWORK = IVT + LDWKVT * N;
        } else {
          // WORK( IVT ) is M by CHUNK

          CHUNK = (LWORK - 3 * M) ~/ M;
          NWORK = IVT + LDWKVT * CHUNK;
        }

        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;
        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of A
        // CWorkspace: need   2*M [tauq, taup] + M*M [VT] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU]

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, N, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        if (LWORK >= M * N + 3 * M) {
          // Path 6to-fast
          // Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
          // Overwrite WORK(IVT) by right singular vectors of A,
          // copying to A
          // CWorkspace: need   2*M [tauq, taup] + M*N [VT] + M    [work]
          // CWorkspace: prefer 2*M [tauq, taup] + M*N [VT] + M*NB [work]
          // RWorkspace: need   M [e] + M*M [RVT]

          zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M,
              WORK(IVT).asMatrix(LDWKVT), LDWKVT);
          zunmbr(
              'P',
              'R',
              'C',
              M,
              N,
              M,
              A,
              LDA,
              WORK(ITAUP),
              WORK(IVT).asMatrix(LDWKVT),
              LDWKVT,
              WORK(NWORK),
              LWORK - NWORK + 1,
              IERR);
          zlacpy('F', M, N, WORK(IVT).asMatrix(LDWKVT), LDWKVT, A, LDA);
        } else {
          // Path 6to-slow
          // Generate P**H in A
          // CWorkspace: need   2*M [tauq, taup] + M*M [VT] + M    [work]
          // CWorkspace: prefer 2*M [tauq, taup] + M*M [VT] + M*NB [work]
          // RWorkspace: need   0

          zungbr('P', M, N, M, A, LDA, WORK(ITAUP), WORK(NWORK),
              LWORK - NWORK + 1, IERR);

          // Multiply Q in A by real matrix RWORK(IRU), storing the
          // result in WORK(IU), copying to A
          // CWorkspace: need   2*M [tauq, taup] + M*M [VT]
          // CWorkspace: prefer 2*M [tauq, taup] + M*N [VT]
          // RWorkspace: need   M [e] + M*M [RVT] + 2*M*M [rwork]
          // RWorkspace: prefer M [e] + M*M [RVT] + 2*M*N [rwork] < M + 5*M*M since N < 2*M here

          NRWORK = IRU;
          for (I = 1; I <= N; I += CHUNK) {
            BLK = min(N - I + 1, CHUNK);
            zlarcm(M, BLK, RWORK(IRVT).asMatrix(M), M, A(1, I), LDA,
                WORK(IVT).asMatrix(LDWKVT), LDWKVT, RWORK(NRWORK));
            zlacpy(
                'F', M, BLK, WORK(IVT).asMatrix(LDWKVT), LDWKVT, A(1, I), LDA);
          }
        }
      } else if (WNTQS) {
        // Path 6ts (N > M, JOBZ='S')
        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;
        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of A
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU]

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, N, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of A
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   M [e] + M*M [RVT]

        zlaset('F', M, N, Complex.zero, Complex.zero, VT, LDVT);
        zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M, VT, LDVT);
        zunmbr('P', 'R', 'C', M, N, M, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);
      } else {
        // Path 6ta (N > M, JOBZ='A')
        // Perform bidiagonal SVD, computing left singular vectors
        // of bidiagonal matrix in RWORK(IRU) and computing right
        // singular vectors of bidiagonal matrix in RWORK(IRVT)
        // CWorkspace: need   0
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU] + BDSPAC

        IRVT = NRWORK;
        IRU = IRVT + M * M;
        NRWORK = IRU + M * M;

        dbdsdc('L', 'I', M, S, RWORK(IE), RWORK(IRU).asMatrix(M), M,
            RWORK(IRVT).asMatrix(M), M, DUM, IDUM, RWORK(NRWORK), IWORK, INFO);

        // Copy real matrix RWORK(IRU) to complex matrix U
        // Overwrite U by left singular vectors of A
        // CWorkspace: need   2*M [tauq, taup] + M    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + M*NB [work]
        // RWorkspace: need   M [e] + M*M [RVT] + M*M [RU]

        zlacp2('F', M, M, RWORK(IRU).asMatrix(M), M, U, LDU);
        zunmbr('Q', 'L', 'N', M, M, N, A, LDA, WORK(ITAUQ), U, LDU, WORK(NWORK),
            LWORK - NWORK + 1, IERR);

        // Set all of VT to identity matrix

        zlaset('F', N, N, Complex.zero, Complex.one, VT, LDVT);

        // Copy real matrix RWORK(IRVT) to complex matrix VT
        // Overwrite VT by right singular vectors of A
        // CWorkspace: need   2*M [tauq, taup] + N    [work]
        // CWorkspace: prefer 2*M [tauq, taup] + N*NB [work]
        // RWorkspace: need   M [e] + M*M [RVT]

        zlacp2('F', M, M, RWORK(IRVT).asMatrix(M), M, VT, LDVT);
        zunmbr('P', 'R', 'C', N, N, M, A, LDA, WORK(ITAUP), VT, LDVT,
            WORK(NWORK), LWORK - NWORK + 1, IERR);
      }
    }
  }

  // Undo scaling if necessary

  if (ISCL == 1) {
    if (ANRM > BIGNUM) {
      dlascl('G', 0, 0, BIGNUM, ANRM, MINMN, 1, S.asMatrix(MINMN), MINMN, IERR);
    }
    if (INFO.value != 0 && ANRM > BIGNUM) {
      dlascl('G', 0, 0, BIGNUM, ANRM, MINMN - 1, 1, RWORK(IE).asMatrix(MINMN),
          MINMN, IERR);
    }
    if (ANRM < SMLNUM) {
      dlascl('G', 0, 0, SMLNUM, ANRM, MINMN, 1, S.asMatrix(MINMN), MINMN, IERR);
    }
    if (INFO.value != 0 && ANRM < SMLNUM) {
      dlascl('G', 0, 0, SMLNUM, ANRM, MINMN - 1, 1, RWORK(IE).asMatrix(MINMN),
          MINMN, IERR);
    }
  }

  // Return optimal workspace in WORK(1)

  WORK[1] = droundup_lwork(MAXWRK).toComplex();
}