zgejsv function

void zgejsv(
String JOBA,
String JOBU,
String JOBV,
String JOBR,
String JOBT,
String JOBP,
int M,
int N,
Matrix<Complex> A_,
int LDA,
Array<double> SVA_,
Matrix<Complex> U_,
int LDU,
Matrix<Complex> V_,
int LDV,
Array<Complex> CWORK_,
int LWORK,
Array<double> RWORK_,
int LRWORK,
Array<int> IWORK_,
Box<int> INFO,
)
Implementation

void zgejsv(
  final String JOBA,
  final String JOBU,
  final String JOBV,
  final String JOBR,
  final String JOBT,
  final String JOBP,
  final int M,
  final int N,
  final Matrix<Complex> A_,
  final int LDA,
  final Array<double> SVA_,
  final Matrix<Complex> U_,
  final int LDU,
  final Matrix<Complex> V_,
  final int LDV,
  final Array<Complex> CWORK_,
  final int LWORK,
  final Array<double> RWORK_,
  final int LRWORK,
  final Array<int> IWORK_,
  final Box<int> INFO,
) {
  final A = A_.having(ld: LDA);
  final U = U_.having(ld: LDU);
  final V = V_.having(ld: LDV);
  final SVA = SVA_.having(length: N);
  final CWORK = CWORK_.having();
  final RWORK = RWORK_.having(length: LRWORK);
  final IWORK = IWORK_.having();

  const ZERO = 0.0, ONE = 1.0;
  Complex CTEMP;
  double AATMAX,
      AATMIN,
      BIG,
      BIG1,
      COND_OK,
      CONDR1,
      CONDR2,
      ENTRA,
      ENTRAT,
      EPSLN,
      MAXPRJ,
      SCALEM,
      SCONDA,
      SFMIN,
      SMALL,
      USCAL1,
      USCAL2;
  int N1 = 0, NR, NUMRANK, p, q, WARNING;
  bool ALMORT,
      DEFR,
      ERREST,
      GOSCAL,
      JRACC,
      KILL,
      LQUERY,
      LSVEC,
      L2ABER,
      L2KILL,
      L2PERT,
      L2RANK,
      L2TRAN,
      NOSCAL,
      ROWPIV,
      RSVEC,
      TRANSP;

  int OPTWRK = 0, MINWRK = 0, MINRWRK = 0, MINIWRK = 0;
  int LWCON,
      LWLQF,
      LWQP3,
      LWQRF,
      LWUNMLQ,
      LWUNMQR,
      LWUNMQRM,
      LWSVDJ,
      LWSVDJV,
      LRWQP3,
      LRWCON,
      LRWSVDJ,
      IWOFF = 0;
  int LWRK_ZGELQF = 0,
      LWRK_ZGEQP3 = 0,
      LWRK_ZGEQP3N,
      LWRK_ZGEQRF = 0,
      LWRK_ZGESVJ = 0,
      LWRK_ZGESVJV,
      LWRK_ZGESVJU,
      LWRK_ZUNMLQ,
      LWRK_ZUNMQR,
      LWRK_ZUNMQRM;
  final CDUMMY = Array<Complex>(1);
  final RDUMMY = Array<double>(1);
  final IERR = Box(0);
  final AAPP = Box(0.0), AAQQ = Box(0.0), XSC = Box(0.0), TEMP1 = Box(0.0);

  // Test the input arguments
  LSVEC = lsame(JOBU, 'U') || lsame(JOBU, 'F');
  JRACC = lsame(JOBV, 'J');
  RSVEC = lsame(JOBV, 'V') || JRACC;
  ROWPIV = lsame(JOBA, 'F') || lsame(JOBA, 'G');
  L2RANK = lsame(JOBA, 'R');
  L2ABER = lsame(JOBA, 'A');
  ERREST = lsame(JOBA, 'E') || lsame(JOBA, 'G');
  L2TRAN = lsame(JOBT, 'T') && (M == N);
  L2KILL = lsame(JOBR, 'R');
  DEFR = lsame(JOBR, 'N');
  L2PERT = lsame(JOBP, 'P');

  LQUERY = (LWORK == -1) || (LRWORK == -1);

  if (!(ROWPIV || L2RANK || L2ABER || ERREST || lsame(JOBA, 'C'))) {
    INFO.value = -1;
  } else if (!(LSVEC ||
      lsame(JOBU, 'N') ||
      (lsame(JOBU, 'W') && RSVEC && L2TRAN))) {
    INFO.value = -2;
  } else if (!(RSVEC ||
      lsame(JOBV, 'N') ||
      (lsame(JOBV, 'W') && LSVEC && L2TRAN))) {
    INFO.value = -3;
  } else if (!(L2KILL || DEFR)) {
    INFO.value = -4;
  } else if (!(lsame(JOBT, 'T') || lsame(JOBT, 'N'))) {
    INFO.value = -5;
  } else if (!(L2PERT || lsame(JOBP, 'N'))) {
    INFO.value = -6;
  } else if (M < 0) {
    INFO.value = -7;
  } else if ((N < 0) || (N > M)) {
    INFO.value = -8;
  } else if (LDA < M) {
    INFO.value = -10;
  } else if (LSVEC && (LDU < M)) {
    INFO.value = -13;
  } else if (RSVEC && (LDV < N)) {
    INFO.value = -15;
  } else {
    INFO.value = 0;
  }

  if (INFO.value == 0) {
    // compute the minimal and the optimal workspace lengths
    // [[The expressions for computing the minimal and the optimal
    // values of LCWORK, LRWORK are written with a lot of redundancy and
    // can be simplified. However, this verbose form is useful for
    // maintenance and modifications of the code.]]

    // minimal workspace length for ZGEQP3 of an M x N matrix,
    //  ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix,
    //  ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N
    //  matrix, ZUNMQR for computing M x N matrix, respectively.
    LWQP3 = N + 1;
    LWQRF = max(1, N);
    LWLQF = max(1, N);
    LWUNMLQ = max(1, N);
    LWUNMQR = max(1, N);
    LWUNMQRM = max(1, M);
    // minimal workspace length for ZPOCON of an N x N matrix
    LWCON = 2 * N;
    // minimal workspace length for ZGESVJ of an N x N matrix,
    // without and with explicit accumulation of Jacobi rotations
    LWSVDJ = max(2 * N, 1);
    LWSVDJV = max(2 * N, 1);
    // minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ
    LRWQP3 = 2 * N;
    LRWCON = N;
    LRWSVDJ = N;
    if (LQUERY) {
      zgeqp3(M, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, RDUMMY, IERR);
      LWRK_ZGEQP3 = CDUMMY[1].toInt();
      zgeqrf(N, N, A, LDA, CDUMMY, CDUMMY, -1, IERR);
      LWRK_ZGEQRF = CDUMMY[1].toInt();
      zgelqf(N, N, A, LDA, CDUMMY, CDUMMY, -1, IERR);
      LWRK_ZGELQF = CDUMMY[1].toInt();
    }
    MINWRK = 2;
    OPTWRK = 2;
    MINIWRK = N;
    if (!(LSVEC || RSVEC)) {
      // minimal and optimal sizes of the complex workspace if
      // only the singular values are requested
      if (ERREST) {
        MINWRK = max(max(N + LWQP3, pow(N, 2).toInt().toInt() + LWCON),
            max(N + LWQRF, LWSVDJ));
      } else {
        MINWRK = max(N + LWQP3, max(N + LWQRF, LWSVDJ));
      }
      if (LQUERY) {
        zgesvj('L', 'N', 'N', N, N, A, LDA, SVA, N, V, LDV, CDUMMY, -1, RDUMMY,
            -1, IERR);
        LWRK_ZGESVJ = CDUMMY[1].toInt();
        if (ERREST) {
          OPTWRK = max(max(N + LWRK_ZGEQP3, pow(N, 2).toInt() + LWCON),
              max(N + LWRK_ZGEQRF, LWRK_ZGESVJ));
        } else {
          OPTWRK = max(N + LWRK_ZGEQP3, max(N + LWRK_ZGEQRF, LWRK_ZGESVJ));
        }
      }
      if (L2TRAN || ROWPIV) {
        if (ERREST) {
          MINRWRK = max(7, max(max(2 * M, LRWQP3), max(LRWCON, LRWSVDJ)));
        } else {
          MINRWRK = max(max(7, 2 * M), max(LRWQP3, LRWSVDJ));
        }
      } else {
        if (ERREST) {
          MINRWRK = max(max(7, LRWQP3), max(LRWCON, LRWSVDJ));
        } else {
          MINRWRK = max(7, max(LRWQP3, LRWSVDJ));
        }
      }
      if (ROWPIV || L2TRAN) MINIWRK += M;
    } else if (RSVEC && !LSVEC) {
      // minimal and optimal sizes of the complex workspace if the
      // singular values and the right singular vectors are requested
      if (ERREST) {
        MINWRK = max(max(N + LWQP3, max(LWCON, LWSVDJ)),
            max(max(N + LWLQF, 2 * N + LWQRF), max(N + LWSVDJ, N + LWUNMLQ)));
      } else {
        MINWRK = max(max(N + LWQP3, LWSVDJ),
            max(max(N + LWLQF, 2 * N + LWQRF), max(N + LWSVDJ, N + LWUNMLQ)));
      }
      if (LQUERY) {
        zgesvj('L', 'U', 'N', N, N, U, LDU, SVA, N, A, LDA, CDUMMY, -1, RDUMMY,
            -1, IERR);
        LWRK_ZGESVJ = CDUMMY[1].toInt();
        zunmlq('L', 'C', N, N, N, A, LDA, CDUMMY, V, LDV, CDUMMY, -1, IERR);
        LWRK_ZUNMLQ = CDUMMY[1].toInt();
        if (ERREST) {
          OPTWRK = max(
              max(N + LWRK_ZGEQP3, max(LWCON, LWRK_ZGESVJ)),
              max(max(N + LWRK_ZGELQF, 2 * N + LWRK_ZGEQRF),
                  max(N + LWRK_ZGESVJ, N + LWRK_ZUNMLQ)));
        } else {
          OPTWRK = max(
              max(N + LWRK_ZGEQP3, LWRK_ZGESVJ),
              max(max(N + LWRK_ZGELQF, 2 * N + LWRK_ZGEQRF),
                  max(N + LWRK_ZGESVJ, N + LWRK_ZUNMLQ)));
        }
      }
      if (L2TRAN || ROWPIV) {
        if (ERREST) {
          MINRWRK = max(7, max(max(2 * M, LRWQP3), max(LRWSVDJ, LRWCON)));
        } else {
          MINRWRK = max(max(7, 2 * M), max(LRWQP3, LRWSVDJ));
        }
      } else {
        if (ERREST) {
          MINRWRK = max(max(7, LRWQP3), max(LRWSVDJ, LRWCON));
        } else {
          MINRWRK = max(7, max(LRWQP3, LRWSVDJ));
        }
      }
      if (ROWPIV || L2TRAN) MINIWRK += M;
    } else if (LSVEC && !RSVEC) {
      // minimal and optimal sizes of the complex workspace if the
      // singular values and the left singular vectors are requested
      if (ERREST) {
        MINWRK =
            N + max(LWQP3, max(max(LWCON, N + LWQRF), max(LWSVDJ, LWUNMQRM)));
      } else {
        MINWRK = N + max(max(LWQP3, N + LWQRF), max(LWSVDJ, LWUNMQRM));
      }
      if (LQUERY) {
        zgesvj('L', 'U', 'N', N, N, U, LDU, SVA, N, A, LDA, CDUMMY, -1, RDUMMY,
            -1, IERR);
        LWRK_ZGESVJ = CDUMMY[1].toInt();
        zunmqr('L', 'N', M, N, N, A, LDA, CDUMMY, U, LDU, CDUMMY, -1, IERR);
        LWRK_ZUNMQRM = CDUMMY[1].toInt();
        if (ERREST) {
          OPTWRK = N +
              max(
                  LWRK_ZGEQP3,
                  max(max(LWCON, N + LWRK_ZGEQRF),
                      max(LWRK_ZGESVJ, LWRK_ZUNMQRM)));
        } else {
          OPTWRK = N +
              max(max(LWRK_ZGEQP3, N + LWRK_ZGEQRF),
                  max(LWRK_ZGESVJ, LWRK_ZUNMQRM));
        }
      }
      if (L2TRAN || ROWPIV) {
        if (ERREST) {
          MINRWRK = max(7, max(max(2 * M, LRWQP3), max(LRWSVDJ, LRWCON)));
        } else {
          MINRWRK = max(max(7, 2 * M), max(LRWQP3, LRWSVDJ));
        }
      } else {
        if (ERREST) {
          MINRWRK = max(max(7, LRWQP3), max(LRWSVDJ, LRWCON));
        } else {
          MINRWRK = max(7, max(LRWQP3, LRWSVDJ));
        }
      }
      if (ROWPIV || L2TRAN) MINIWRK += M;
    } else {
      // minimal and optimal sizes of the complex workspace if the
      // full SVD is requested
      if (!JRACC) {
        if (ERREST) {
          MINWRK = max(
              max(
                  N + LWQP3,
                  max(max(N + LWCON, 2 * N + pow(N, 2).toInt() + LWCON),
                      max(2 * N + LWQRF, 2 * N + LWQP3))),
              max(
                  max(
                      max(
                          2 * N + pow(N, 2).toInt() + N + LWLQF,
                          2 * N +
                              pow(N, 2).toInt() +
                              N +
                              pow(N, 2).toInt() +
                              LWCON),
                      max(2 * N + pow(N, 2).toInt() + N + LWSVDJ,
                          2 * N + pow(N, 2).toInt() + N + LWSVDJV)),
                  max(
                      max(2 * N + pow(N, 2).toInt() + N + LWUNMQR,
                          2 * N + pow(N, 2).toInt() + N + LWUNMLQ),
                      max(N + pow(N, 2).toInt() + LWSVDJ, N + LWUNMQRM))));
        } else {
          MINWRK = max(
              max(max(N + LWQP3, 2 * N + pow(N, 2).toInt() + LWCON),
                  max(2 * N + LWQRF, 2 * N + LWQP3)),
              max(
                  max(
                      max(
                          2 * N + pow(N, 2).toInt() + N + LWLQF,
                          2 * N +
                              pow(N, 2).toInt() +
                              N +
                              pow(N, 2).toInt() +
                              LWCON),
                      max(2 * N + pow(N, 2).toInt() + N + LWSVDJ,
                          2 * N + pow(N, 2).toInt() + N + LWSVDJV)),
                  max(
                      max(2 * N + pow(N, 2).toInt() + N + LWUNMQR,
                          2 * N + pow(N, 2).toInt() + N + LWUNMLQ),
                      max(N + pow(N, 2).toInt() + LWSVDJ, N + LWUNMQRM))));
        }
        MINIWRK += N;
        if (ROWPIV || L2TRAN) MINIWRK += M;
      } else {
        if (ERREST) {
          MINWRK = max(
              max(N + LWQP3, N + LWCON),
              max(max(2 * N + LWQRF, 2 * N + pow(N, 2).toInt() + LWSVDJV),
                  max(2 * N + pow(N, 2).toInt() + N + LWUNMQR, N + LWUNMQRM)));
        } else {
          MINWRK = max(
              N + LWQP3,
              max(max(2 * N + LWQRF, 2 * N + pow(N, 2).toInt() + LWSVDJV),
                  max(2 * N + pow(N, 2).toInt() + N + LWUNMQR, N + LWUNMQRM)));
        }
        if (ROWPIV || L2TRAN) MINIWRK += M;
      }
      if (LQUERY) {
        zunmqr('L', 'N', M, N, N, A, LDA, CDUMMY, U, LDU, CDUMMY, -1, IERR);
        LWRK_ZUNMQRM = CDUMMY[1].toInt();
        zunmqr('L', 'N', N, N, N, A, LDA, CDUMMY, U, LDU, CDUMMY, -1, IERR);
        LWRK_ZUNMQR = CDUMMY[1].toInt();
        if (!JRACC) {
          zgeqp3(N, N, A, LDA, IWORK, CDUMMY, CDUMMY, -1, RDUMMY, IERR);
          LWRK_ZGEQP3N = CDUMMY[1].toInt();
          zgesvj('L', 'U', 'N', N, N, U, LDU, SVA, N, V, LDV, CDUMMY, -1,
              RDUMMY, -1, IERR);
          LWRK_ZGESVJ = CDUMMY[1].toInt();
          zgesvj('U', 'U', 'N', N, N, U, LDU, SVA, N, V, LDV, CDUMMY, -1,
              RDUMMY, -1, IERR);
          LWRK_ZGESVJU = CDUMMY[1].toInt();
          zgesvj('L', 'U', 'V', N, N, U, LDU, SVA, N, V, LDV, CDUMMY, -1,
              RDUMMY, -1, IERR);
          LWRK_ZGESVJV = CDUMMY[1].toInt();
          zunmlq('L', 'C', N, N, N, A, LDA, CDUMMY, V, LDV, CDUMMY, -1, IERR);
          LWRK_ZUNMLQ = CDUMMY[1].toInt();
          if (ERREST) {
            OPTWRK = max(
                max(
                    N + LWRK_ZGEQP3,
                    max(max(N + LWCON, 2 * N + pow(N, 2).toInt() + LWCON),
                        max(2 * N + LWRK_ZGEQRF, 2 * N + LWRK_ZGEQP3N))),
                max(
                    max(
                        max(
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZGELQF,
                            2 * N +
                                pow(N, 2).toInt() +
                                N +
                                pow(N, 2).toInt() +
                                LWCON),
                        max(2 * N + pow(N, 2).toInt() + N + LWRK_ZGESVJ,
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZGESVJV)),
                    max(
                        max(2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMQR,
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMLQ),
                        max(N + pow(N, 2).toInt() + LWRK_ZGESVJU,
                            N + LWRK_ZUNMQRM))));
          } else {
            OPTWRK = max(
                max(max(N + LWRK_ZGEQP3, 2 * N + pow(N, 2).toInt() + LWCON),
                    max(2 * N + LWRK_ZGEQRF, 2 * N + LWRK_ZGEQP3N)),
                max(
                    max(
                        max(
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZGELQF,
                            2 * N +
                                pow(N, 2).toInt() +
                                N +
                                pow(N, 2).toInt() +
                                LWCON),
                        max(2 * N + pow(N, 2).toInt() + N + LWRK_ZGESVJ,
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZGESVJV)),
                    max(
                        max(2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMQR,
                            2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMLQ),
                        max(N + pow(N, 2).toInt() + LWRK_ZGESVJU,
                            N + LWRK_ZUNMQRM))));
          }
        } else {
          zgesvj('L', 'U', 'V', N, N, U, LDU, SVA, N, V, LDV, CDUMMY, -1,
              RDUMMY, -1, IERR);
          LWRK_ZGESVJV = CDUMMY[1].toInt();
          zunmqr('L', 'N', N, N, N, CDUMMY.asMatrix(N), N, CDUMMY, V, LDV,
              CDUMMY, -1, IERR);
          LWRK_ZUNMQR = CDUMMY[1].toInt();
          zunmqr('L', 'N', M, N, N, A, LDA, CDUMMY, U, LDU, CDUMMY, -1, IERR);
          LWRK_ZUNMQRM = CDUMMY[1].toInt();
          if (ERREST) {
            OPTWRK = max(
                max(N + LWRK_ZGEQP3, max(N + LWCON, 2 * N + LWRK_ZGEQRF)),
                max(
                    max(2 * N + pow(N, 2).toInt(),
                        2 * N + pow(N, 2).toInt() + LWRK_ZGESVJV),
                    max(2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMQR,
                        N + LWRK_ZUNMQRM)));
          } else {
            OPTWRK = max(
                max(N + LWRK_ZGEQP3, 2 * N + LWRK_ZGEQRF),
                max(
                    max(2 * N + pow(N, 2).toInt(),
                        2 * N + pow(N, 2).toInt() + LWRK_ZGESVJV),
                    max(2 * N + pow(N, 2).toInt() + N + LWRK_ZUNMQR,
                        N + LWRK_ZUNMQRM)));
          }
        }
      }
      if (L2TRAN || ROWPIV) {
        MINRWRK = max(7, max(max(2 * M, LRWQP3), max(LRWSVDJ, LRWCON)));
      } else {
        MINRWRK = max(max(7, LRWQP3), max(LRWSVDJ, LRWCON));
      }
    }
    MINWRK = max(2, MINWRK);
    OPTWRK = max(MINWRK, OPTWRK);
    if (LWORK < MINWRK && !LQUERY) INFO.value = -17;
    if (LRWORK < MINRWRK && !LQUERY) INFO.value = -19;
  }

  if (INFO.value != 0) {
    xerbla('ZGEJSV', -INFO.value);
    return;
  } else if (LQUERY) {
    CWORK[1] = OPTWRK.toComplex();
    CWORK[2] = MINWRK.toComplex();
    RWORK[1] = MINRWRK.toDouble();
    IWORK[1] = max(4, MINIWRK);
    return;
  }

  // Quick return for void matrix (Y3K safe)
  if ((M == 0) || (N == 0)) {
    for (var i = 1; i <= 4; i++) {
      IWORK[i] = 0;
    }
    for (var i = 1; i <= 7; i++) {
      RWORK[i] = 0;
    }
    return;
  }

  // Determine whether the matrix U should be M x N or M x M

  if (LSVEC) {
    N1 = N;
    if (lsame(JOBU, 'F')) N1 = M;
  }

  // Set numerical parameters

  // !    NOTE: Make sure dlamch() does not fail on the target architecture.
  EPSLN = dlamch('Epsilon');
  SFMIN = dlamch('SafeMinimum');
  SMALL = SFMIN / EPSLN;
  BIG = dlamch('O');
  // BIG   = ONE / SFMIN

  // Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N

  // (!)  If necessary, scale SVA() to protect the largest norm from
  // overflow. It is possible that this scaling pushes the smallest
  // column norm left from the underflow threshold (extreme case).
  SCALEM = ONE / sqrt(M * N);
  NOSCAL = true;
  GOSCAL = true;
  for (p = 1; p <= N; p++) {
    AAPP.value = ZERO;
    AAQQ.value = ONE;
    zlassq(M, A(1, p).asArray(), 1, AAPP, AAQQ);
    if (AAPP.value > BIG) {
      INFO.value = -9;
      xerbla('ZGEJSV', -INFO.value);
      return;
    }
    AAQQ.value = sqrt(AAQQ.value);
    if ((AAPP.value < (BIG / AAQQ.value)) && NOSCAL) {
      SVA[p] = AAPP.value * AAQQ.value;
    } else {
      NOSCAL = false;
      SVA[p] = AAPP.value * (AAQQ.value * SCALEM);
      if (GOSCAL) {
        GOSCAL = false;
        dscal(p - 1, SCALEM, SVA, 1);
      }
    }
  }

  if (NOSCAL) SCALEM = ONE;

  AAPP.value = ZERO;
  AAQQ.value = BIG;
  for (p = 1; p <= N; p++) {
    AAPP.value = max(AAPP.value, SVA[p]);
    if (SVA[p] != ZERO) AAQQ.value = min(AAQQ.value, SVA[p]);
  }

  // Quick return for zero M x N matrix
  if (AAPP.value == ZERO) {
    if (LSVEC) zlaset('G', M, N1, Complex.zero, Complex.one, U, LDU);
    if (RSVEC) zlaset('G', N, N, Complex.zero, Complex.one, V, LDV);
    RWORK[1] = ONE;
    RWORK[2] = ONE;
    if (ERREST) RWORK[3] = ONE;
    if (LSVEC && RSVEC) {
      RWORK[4] = ONE;
      RWORK[5] = ONE;
    }
    if (L2TRAN) {
      RWORK[6] = ZERO;
      RWORK[7] = ZERO;
    }
    IWORK[1] = 0;
    IWORK[2] = 0;
    IWORK[3] = 0;
    IWORK[4] = -1;
    return;
  }

  // Issue warning if denormalized column norms detected. Override the
  // high relative accuracy request. Issue licence to kill nonzero columns
  // (set them to zero) whose norm is less than sigma_max / BIG (roughly).
  WARNING = 0;
  if (AAQQ.value <= SFMIN) {
    L2RANK = true;
    L2KILL = true;
    WARNING = 1;
  }

  // Quick return for one-column matrix
  if (N == 1) {
    if (LSVEC) {
      zlascl('G', 0, 0, SVA[1], SCALEM, M, 1, A(1, 1), LDA, IERR);
      zlacpy('A', M, 1, A, LDA, U, LDU);
      // computing all M left singular vectors of the M x 1 matrix
      if (N1 != N) {
        zgeqrf(M, N, U, LDU, CWORK, CWORK(N + 1), LWORK - N, IERR);
        zungqr(M, N1, 1, U, LDU, CWORK, CWORK(N + 1), LWORK - N, IERR);
        zcopy(M, A(1, 1).asArray(), 1, U(1, 1).asArray(), 1);
      }
    }
    if (RSVEC) {
      V[1][1] = Complex.one;
    }
    if (SVA[1] < (BIG * SCALEM)) {
      SVA[1] /= SCALEM;
      SCALEM = ONE;
    }
    RWORK[1] = ONE / SCALEM;
    RWORK[2] = ONE;
    if (SVA[1] != ZERO) {
      IWORK[1] = 1;
      if ((SVA[1] / SCALEM) >= SFMIN) {
        IWORK[2] = 1;
      } else {
        IWORK[2] = 0;
      }
    } else {
      IWORK[1] = 0;
      IWORK[2] = 0;
    }
    IWORK[3] = 0;
    IWORK[4] = -1;
    if (ERREST) RWORK[3] = ONE;
    if (LSVEC && RSVEC) {
      RWORK[4] = ONE;
      RWORK[5] = ONE;
    }
    if (L2TRAN) {
      RWORK[6] = ZERO;
      RWORK[7] = ZERO;
    }
    return;
  }

  TRANSP = false;

  AATMAX = -ONE;
  AATMIN = BIG;
  if (ROWPIV || L2TRAN) {
    // Compute the row norms, needed to determine row pivoting sequence
    // (in the case of heavily row weighted A, row pivoting is strongly
    // advised) and to collect information needed to compare the
    // structures of A * A^* and A^* * A (in the case L2TRAN == true ).
    if (L2TRAN) {
      for (p = 1; p <= M; p++) {
        XSC.value = ZERO;
        TEMP1.value = ONE;
        zlassq(N, A(p, 1).asArray(), LDA, XSC, TEMP1);
        // ZLASSQ gets both the ell_2 and the ell_infinity norm
        // in one pass through the vector
        RWORK[M + p] = XSC.value * SCALEM;
        RWORK[p] = XSC.value * (SCALEM * sqrt(TEMP1.value));
        AATMAX = max(AATMAX, RWORK[p]);
        if (RWORK[p] != ZERO) AATMIN = min(AATMIN, RWORK[p]);
      }
    } else {
      for (p = 1; p <= M; p++) {
        RWORK[M + p] = SCALEM * A[p][izamax(N, A(p, 1).asArray(), LDA)].abs();
        AATMAX = max(AATMAX, RWORK[M + p]);
        AATMIN = min(AATMIN, RWORK[M + p]);
      }
    }
  }

  // For square matrix A try to determine whether A^*  would be better
  // input for the preconditioned Jacobi SVD, with faster convergence.
  // The decision is based on an O(N) function of the vector of column
  // and row norms of A, based on the Shannon entropy. This should give
  // the right choice in most cases when the difference actually matters.
  // It may fail and pick the slower converging side.
  ENTRA = ZERO;
  ENTRAT = ZERO;
  if (L2TRAN) {
    XSC.value = ZERO;
    TEMP1.value = ONE;
    dlassq(N, SVA, 1, XSC, TEMP1);
    TEMP1.value = ONE / TEMP1.value;

    ENTRA = ZERO;
    for (p = 1; p <= N; p++) {
      BIG1 = (pow(SVA[p] / XSC.value, 2)) * TEMP1.value;
      if (BIG1 != ZERO) ENTRA += BIG1 * log(BIG1);
    }
    ENTRA = -ENTRA / log(N);

    // Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
    // It is derived from the diagonal of  A^* * A.  Do the same with the
    // diagonal of A * A^*, compute the entropy of the corresponding
    // probability distribution. Note that A * A^* and A^* * A have the
    // same trace.
    ENTRAT = ZERO;
    for (p = 1; p <= M; p++) {
      BIG1 = (pow(RWORK[p] / XSC.value, 2)) * TEMP1.value;
      if (BIG1 != ZERO) ENTRAT += BIG1 * log(BIG1);
    }
    ENTRAT = -ENTRAT / log(M);

    // Analyze the entropies and decide A or A^*. Smaller entropy
    // usually means better input for the algorithm.
    TRANSP = (ENTRAT < ENTRA);

    // If A^* is better than A, take the adjoint of A. This is allowed
    // only for square matrices, M=N.
    if (TRANSP) {
      // In an optimal implementation, this trivial transpose
      // should be replaced with faster transpose.
      for (p = 1; p <= N - 1; p++) {
        A[p][p] = A[p][p].conjugate();
        for (q = p + 1; q <= N; q++) {
          CTEMP = A[q][p].conjugate();
          A[q][p] = A[p][q].conjugate();
          A[p][q] = CTEMP;
        }
      }
      A[N][N] = A[N][N].conjugate();
      for (p = 1; p <= N; p++) {
        RWORK[M + p] = SVA[p];
        SVA[p] = RWORK[p];
        // previously computed row 2-norms are now column 2-norms
        // of the transposed matrix
      }
      TEMP1.value = AAPP.value;
      AAPP.value = AATMAX;
      AATMAX = TEMP1.value;
      TEMP1.value = AAQQ.value;
      AAQQ.value = AATMIN;
      AATMIN = TEMP1.value;
      KILL = LSVEC;
      LSVEC = RSVEC;
      RSVEC = KILL;
      if (LSVEC) N1 = N;

      ROWPIV = true;
    }
  }
  // END IF L2TRAN

  // Scale the matrix so that its maximal singular value remains less
  // than sqrt(BIG) -- the matrix is scaled so that its maximal column
  // has Euclidean norm equal to sqrt(BIG/N). The only reason to keep
  // sqrt(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
  // BLAS routines that, in some implementations, are not capable of
  // working in the full interval [SFMIN,BIG] and that they may provoke
  // overflows in the intermediate results. If the singular values spread
  // from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
  // one should use ZGESVJ instead of ZGEJSV.
  // >> change in the April 2016 update: allow bigger range, i.e. the
  // largest column is allowed up to BIG/N and ZGESVJ will do the rest.
  BIG1 = sqrt(BIG);
  TEMP1.value = sqrt(BIG / N);

  dlascl('G', 0, 0, AAPP.value, TEMP1.value, N, 1, SVA.asMatrix(N), N, IERR);
  if (AAQQ.value > (AAPP.value * SFMIN)) {
    AAQQ.value = (AAQQ.value / AAPP.value) * TEMP1.value;
  } else {
    AAQQ.value = (AAQQ.value * TEMP1.value) / AAPP.value;
  }
  TEMP1.value *= SCALEM;
  zlascl('G', 0, 0, AAPP.value, TEMP1.value, M, N, A, LDA, IERR);

  // To undo scaling at the end of this procedure, multiply the
  // computed singular values with USCAL2 / USCAL1.
  USCAL1 = TEMP1.value;
  USCAL2 = AAPP.value;

  if (L2KILL) {
    // L2KILL enforces computation of nonzero singular values in
    // the restricted range of condition number of the initial A,
    // sigma_max(A) / sigma_min(A) approx. sqrt(BIG)/sqrt(SFMIN).
    XSC.value = sqrt(SFMIN);
  } else {
    XSC.value = SMALL;

    // Now, if the condition number of A is too big,
    // sigma_max(A) / sigma_min(A) > sqrt(BIG/N) * EPSLN / SFMIN,
    // as a precaution measure, the full SVD is computed using ZGESVJ
    // with accumulated Jacobi rotations. This provides numerically
    // more robust computation, at the cost of slightly increased run
    // time. Depending on the concrete implementation of BLAS and LAPACK
    // (i.e. how they behave in presence of extreme ill-conditioning) the
    // implementor may decide to remove this switch.
    if ((AAQQ.value < sqrt(SFMIN)) && LSVEC && RSVEC) {
      JRACC = true;
    }
  }
  if (AAQQ.value < XSC.value) {
    for (p = 1; p <= N; p++) {
      if (SVA[p] < XSC.value) {
        zlaset('A', M, 1, Complex.zero, Complex.zero, A(1, p), LDA);
        SVA[p] = ZERO;
      }
    }
  }

  // Preconditioning using QR factorization with pivoting
  if (ROWPIV) {
    // Optional row permutation (Bjoerck row pivoting):
    // A result by Cox and Higham shows that the Bjoerck's
    // row pivoting combined with standard column pivoting
    // has similar effect as Powell-Reid complete pivoting.
    // The ell-infinity norms of A are made nonincreasing.
    if ((LSVEC && RSVEC) && !JRACC) {
      IWOFF = 2 * N;
    } else {
      IWOFF = N;
    }
    for (p = 1; p <= M - 1; p++) {
      q = idamax(M - p + 1, RWORK(M + p), 1) + p - 1;
      IWORK[IWOFF + p] = q;
      if (p != q) {
        TEMP1.value = RWORK[M + p];
        RWORK[M + p] = RWORK[M + q];
        RWORK[M + q] = TEMP1.value;
      }
    }
    zlaswp(N, A, LDA, 1, M - 1, IWORK(IWOFF + 1), 1);
  }

  // End of the preparation phase (scaling, optional sorting and
  // transposing, optional flushing of small columns).

  // Preconditioning

  // If the full SVD is needed, the right singular vectors are computed
  // from a matrix equation, and for that we need theoretical analysis
  // of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
  // In all other cases the first RR QRF can be chosen by other criteria
  // (eg speed by replacing global with restricted window pivoting, such
  // as in xGEQPX from TOMS # 782). Good results will be obtained using
  // xGEQPX with properly (!) chosen numerical parameters.
  // Any improvement of ZGEQP3 improves overall performance of ZGEJSV.

  // A * P1 = Q1 * [ R1^* 0]^*:
  for (p = 1; p <= N; p++) {
    // all columns are free columns
    IWORK[p] = 0;
  }
  zgeqp3(M, N, A, LDA, IWORK, CWORK, CWORK(N + 1), LWORK - N, RWORK, IERR);

  // The upper triangular matrix R1 from the first QRF is inspected for
  // rank deficiency and possibilities for deflation, or possible
  // ill-conditioning. Depending on the user specified flag L2RANK,
  // the procedure explores possibilities to reduce the numerical
  // rank by inspecting the computed upper triangular factor. If
  // L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
  // A + dA, where ||dA|| <= f(M,N)*EPSLN.
  NR = 1;
  if (L2ABER) {
    // Standard absolute error bound suffices. All sigma_i with
    // sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
    // aggressive enforcement of lower numerical rank by introducing a
    // backward error of the order of N*EPSLN*||A||.
    TEMP1.value = sqrt(N) * EPSLN;
    for (p = 2; p <= N; p++) {
      if (A[p][p].abs() >= (TEMP1.value * A[1][1].abs())) {
        NR++;
      } else {
        break;
      }
    }
  } else if (L2RANK) {
    // similarly as above, only slightly more gentle (less aggressive).
    // Sudden drop on the diagonal of R1 is used as the criterion for
    // close-to-rank-deficient.
    TEMP1.value = sqrt(SFMIN);
    for (p = 2; p <= N; p++) {
      if ((A[p][p].abs() < (EPSLN * A[p - 1][p - 1].abs())) ||
          (A[p][p].abs() < SMALL) ||
          (L2KILL && (A[p][p].abs() < TEMP1.value))) break;
      NR++;
    }
  } else {
    // The goal is high relative accuracy. However, if the matrix
    // has high scaled condition number the relative accuracy is in
    // general not feasible. Later on, a condition number estimator
    // will be deployed to estimate the scaled condition number.
    // Here we just remove the underflowed part of the triangular
    // factor. This prevents the situation in which the code is
    // working hard to get the accuracy not warranted by the data.
    TEMP1.value = sqrt(SFMIN);
    for (p = 2; p <= N; p++) {
      if ((A[p][p].abs() < SMALL) ||
          (L2KILL && (A[p][p].abs() < TEMP1.value))) {
        break;
      }
      NR++;
    }
  }

  ALMORT = false;
  if (NR == N) {
    MAXPRJ = ONE;
    for (p = 2; p <= N; p++) {
      TEMP1.value = A[p][p].abs() / SVA[IWORK[p]];
      MAXPRJ = min(MAXPRJ, TEMP1.value);
    }
    if (pow(MAXPRJ, 2) >= ONE - N * EPSLN) ALMORT = true;
  }

  SCONDA = -ONE;
  CONDR1 = -ONE;
  CONDR2 = -ONE;

  if (ERREST) {
    if (N == NR) {
      if (RSVEC) {
        // V is available as workspace
        zlacpy('U', N, N, A, LDA, V, LDV);
        for (p = 1; p <= N; p++) {
          TEMP1.value = SVA[IWORK[p]];
          zdscal(p, ONE / TEMP1.value, V(1, p).asArray(), 1);
        }
        if (LSVEC) {
          zpocon('U', N, V, LDV, ONE, TEMP1, CWORK(N + 1), RWORK, IERR);
        } else {
          zpocon('U', N, V, LDV, ONE, TEMP1, CWORK, RWORK, IERR);
        }
      } else if (LSVEC) {
        // U is available as workspace
        zlacpy('U', N, N, A, LDA, U, LDU);
        for (p = 1; p <= N; p++) {
          TEMP1.value = SVA[IWORK[p]];
          zdscal(p, ONE / TEMP1.value, U(1, p).asArray(), 1);
        }
        zpocon('U', N, U, LDU, ONE, TEMP1, CWORK(N + 1), RWORK, IERR);
      } else {
        zlacpy('U', N, N, A, LDA, CWORK.asMatrix(N), N);
        // []            CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
        // Change: here index shifted by N to the left, CWORK(1:N)
        // not needed for SIGMA only computation
        for (p = 1; p <= N; p++) {
          TEMP1.value = SVA[IWORK[p]];
          // []               CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
          zdscal(p, ONE / TEMP1.value, CWORK((p - 1) * N + 1), 1);
        }
        // the columns of R are scaled to have unit Euclidean lengths.
        // []               CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
        // []     $              CWORK(N+N*N+1), RWORK, IERR )
        zpocon('U', N, CWORK.asMatrix(N), N, ONE, TEMP1, CWORK(N * N + 1),
            RWORK, IERR);
      }
      if (TEMP1.value != ZERO) {
        SCONDA = ONE / sqrt(TEMP1.value);
      } else {
        SCONDA = -ONE;
      }
      // SCONDA is an estimate of sqrt(||(R^* * R)^(-1)||_1).
      // N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
    } else {
      SCONDA = -ONE;
    }
  }

  L2PERT = L2PERT && ((A[1][1] / A[NR][NR]).abs() > sqrt(BIG1));
  // If there is no violent scaling, artificial perturbation is not needed.

  // Phase 3:
  if (!(RSVEC || LSVEC)) {
    // Singular Values only

    // transpose A(1:NR,1:N)
    for (p = 1; p <= min(N - 1, NR); p++) {
      zcopy(N - p, A(p, p + 1).asArray(), LDA, A(p + 1, p).asArray(), 1);
      zlacgv(N - p + 1, A(p, p).asArray(), 1);
    }
    if (NR == N) A[N][N] = A[N][N].conjugate();

    // The following two DO-loops introduce small relative perturbation
    // into the strict upper triangle of the lower triangular matrix.
    // Small entries below the main diagonal are also changed.
    // This modification is useful if the computing environment does not
    // provide/allow FLUSH TO ZERO underflow, for it prevents many
    // annoying denormalized numbers in case of strongly scaled matrices.
    // The perturbation is structured so that it does not introduce any
    // new perturbation of the singular values, and it does not destroy
    // the job done by the preconditioner.
    // The licence for this perturbation is in the variable L2PERT, which
    // should be false if FLUSH TO ZERO underflow is active.
    if (!ALMORT) {
      if (L2PERT) {
        // XSC = sqrt(SMALL)
        XSC.value = EPSLN / N;
        for (q = 1; q <= NR; q++) {
          CTEMP = Complex(XSC.value * A[q][q].abs(), ZERO);
          for (p = 1; p <= N; p++) {
            if (((p > q) && (A[p][q].abs() <= TEMP1.value)) || (p < q)) {
              // $                     A[p][q] = TEMP1 * ( A[p][q] / ABS(A[p][q]) )
              A[p][q] = CTEMP;
            }
          }
        }
      } else {
        zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, A(1, 2), LDA);
      }

      // second preconditioning using the QR factorization
      zgeqrf(N, NR, A, LDA, CWORK, CWORK(N + 1), LWORK - N, IERR);

      // and transpose upper to lower triangular
      for (p = 1; p <= NR - 1; p++) {
        zcopy(NR - p, A(p, p + 1).asArray(), LDA, A(p + 1, p).asArray(), 1);
        zlacgv(NR - p + 1, A(p, p).asArray(), 1);
      }
    }

    // Row-cyclic Jacobi SVD algorithm with column pivoting

    // again some perturbation (a "background noise") is added
    // to drown denormals
    if (L2PERT) {
      // XSC = sqrt(SMALL)
      XSC.value = EPSLN / N;
      for (q = 1; q <= NR; q++) {
        CTEMP = Complex(XSC.value * A[q][q].abs(), ZERO);
        for (p = 1; p <= NR; p++) {
          if (((p > q) && (A[p][q].abs() <= TEMP1.value)) || (p < q)) {
            // $                   A[p][q] = TEMP1 * ( A[p][q] / ABS(A[p][q]) )
            A[p][q] = CTEMP;
          }
        }
      }
    } else {
      zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, A(1, 2), LDA);
    }

    // and one-sided Jacobi rotations are started on a lower
    // triangular matrix (plus perturbation which is ignored in
    // the part which destroys triangular form (confusing?!))
    zgesvj('L', 'N', 'N', NR, NR, A, LDA, SVA, N, V, LDV, CWORK, LWORK, RWORK,
        LRWORK, INFO);

    SCALEM = RWORK[1];
    NUMRANK = nint(RWORK[2]);
  } else if ((RSVEC && !LSVEC && !JRACC) || (JRACC && !LSVEC && (NR != N))) {
    // -> Singular Values and Right Singular Vectors <-

    if (ALMORT) {
      // in this case NR equals N
      for (p = 1; p <= NR; p++) {
        zcopy(N - p + 1, A(p, p).asArray(), LDA, V(p, p).asArray(), 1);
        zlacgv(N - p + 1, V(p, p).asArray(), 1);
      }
      zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);

      zgesvj('L', 'U', 'N', N, NR, V, LDV, SVA, NR, A, LDA, CWORK, LWORK, RWORK,
          LRWORK, INFO);
      SCALEM = RWORK[1];
      NUMRANK = nint(RWORK[2]);
    } else {
      // two more QR factorizations ( one QRF is not enough, two require
      // accumulated product of Jacobi rotations, three are perfect )
      zlaset('L', NR - 1, NR - 1, Complex.zero, Complex.zero, A(2, 1), LDA);
      zgelqf(NR, N, A, LDA, CWORK, CWORK(N + 1), LWORK - N, IERR);
      zlacpy('L', NR, NR, A, LDA, V, LDV);
      zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);
      zgeqrf(
          NR, NR, V, LDV, CWORK(N + 1), CWORK(2 * N + 1), LWORK - 2 * N, IERR);
      for (p = 1; p <= NR; p++) {
        zcopy(NR - p + 1, V(p, p).asArray(), LDV, V(p, p).asArray(), 1);
        zlacgv(NR - p + 1, V(p, p).asArray(), 1);
      }
      zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);

      zgesvj('L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U, LDU, CWORK(N + 1),
          LWORK - N, RWORK, LRWORK, INFO);
      SCALEM = RWORK[1];
      NUMRANK = nint(RWORK[2]);
      if (NR < N) {
        zlaset('A', N - NR, NR, Complex.zero, Complex.zero, V(NR + 1, 1), LDV);
        zlaset('A', NR, N - NR, Complex.zero, Complex.zero, V(1, NR + 1), LDV);
        zlaset('A', N - NR, N - NR, Complex.zero, Complex.one,
            V(NR + 1, NR + 1), LDV);
      }

      zunmlq('L', 'C', N, N, NR, A, LDA, CWORK, V, LDV, CWORK(N + 1), LWORK - N,
          IERR);
    }
    // permute the rows of V
    // DO 8991 p = 1, N
    //    CALL ZCOPY( N, V(p,1), LDV, A(IWORK[p],1), LDA )
    // 8991    CONTINUE
    // CALL ZLACPY( 'All', N, N, A, LDA, V, LDV )
    zlapmr(false, N, N, V, LDV, IWORK);

    if (TRANSP) {
      zlacpy('A', N, N, V, LDV, U, LDU);
    }
  } else if (JRACC && !LSVEC && (NR == N)) {
    zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero, A(2, 1), LDA);

    zgesvj('U', 'N', 'V', N, N, A, LDA, SVA, N, V, LDV, CWORK, LWORK, RWORK,
        LRWORK, INFO);
    SCALEM = RWORK[1];
    NUMRANK = nint(RWORK[2]);
    zlapmr(false, N, N, V, LDV, IWORK);
  } else if (LSVEC && !RSVEC) {
    // Singular Values and Left Singular Vectors                 ..

    // second preconditioning step to avoid need to accumulate
    // Jacobi rotations in the Jacobi iterations.
    for (p = 1; p <= NR; p++) {
      zcopy(N - p + 1, A(p, p).asArray(), LDA, U(p, p).asArray(), 1);
      zlacgv(N - p + 1, U(p, p).asArray(), 1);
    }
    zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, U(1, 2), LDU);

    zgeqrf(N, NR, U, LDU, CWORK(N + 1), CWORK(2 * N + 1), LWORK - 2 * N, IERR);

    for (p = 1; p <= NR - 1; p++) {
      zcopy(NR - p, U(p, p + 1).asArray(), LDU, U(p + 1, p).asArray(), 1);
      zlacgv(N - p + 1, U(p, p).asArray(), 1);
    }
    zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, U(1, 2), LDU);

    zgesvj('L', 'U', 'N', NR, NR, U, LDU, SVA, NR, A, LDA, CWORK(N + 1),
        LWORK - N, RWORK, LRWORK, INFO);
    SCALEM = RWORK[1];
    NUMRANK = nint(RWORK[2]);

    if (NR < M) {
      zlaset('A', M - NR, NR, Complex.zero, Complex.zero, U(NR + 1, 1), LDU);
      if (NR < N1) {
        zlaset('A', NR, N1 - NR, Complex.zero, Complex.zero, U(1, NR + 1), LDU);
        zlaset('A', M - NR, N1 - NR, Complex.zero, Complex.one,
            U(NR + 1, NR + 1), LDU);
      }
    }

    zunmqr('L', 'N', M, N1, N, A, LDA, CWORK, U, LDU, CWORK(N + 1), LWORK - N,
        IERR);

    if (ROWPIV) zlaswp(N1, U, LDU, 1, M - 1, IWORK(IWOFF + 1), -1);

    for (p = 1; p <= N1; p++) {
      XSC.value = ONE / dznrm2(M, U(1, p).asArray(), 1);
      zdscal(M, XSC.value, U(1, p).asArray(), 1);
    }

    if (TRANSP) {
      zlacpy('A', N, N, U, LDU, V, LDV);
    }
  } else {
    // Full SVD
    if (!JRACC) {
      if (!ALMORT) {
        // Second Preconditioning Step (QRF [with pivoting])
        // Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
        // equivalent to an LQF CALL. Since in many libraries the QRF
        // seems to be better optimized than the LQF, we do explicit
        // transpose and use the QRF. This is subject to changes in an
        // optimized implementation of ZGEJSV.
        for (p = 1; p <= NR; p++) {
          zcopy(N - p + 1, A(p, p).asArray(), LDA, V(p, p).asArray(), 1);
          zlacgv(N - p + 1, V(p, p).asArray(), 1);
        }

        // the following two loops perturb small entries to avoid
        // denormals in the second QR factorization, where they are
        // as good as zeros. This is done to avoid painfully slow
        // computation with denormals. The relative size of the perturbation
        // is a parameter that can be changed by the implementer.
        // This perturbation device will be obsolete on machines with
        // properly implemented arithmetic.
        // To switch it off, set L2PERT= false To remove it from  the
        // code, remove the action under L2PERT= true , leave the ELSE part.
        // The following two loops should be blocked and fused with the
        // transposed copy above.
        if (L2PERT) {
          XSC.value = sqrt(SMALL);
          for (q = 1; q <= NR; q++) {
            CTEMP = Complex(XSC.value * V[q][q].abs(), ZERO);
            for (p = 1; p <= N; p++) {
              if ((p > q) && (V[p][q].abs() <= TEMP1.value) || (p < q)) {
                // $                   V[p][q] = TEMP1 * ( V[p][q] / ABS(V[p][q]) )
                V[p][q] = CTEMP;
              }
              if (p < q) V[p][q] = -V[p][q];
            }
          }
        } else {
          zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);
        }

        // Estimate the row scaled condition number of R1
        // (If R1 is rectangular, N > NR, then the condition number
        // of the leading NR x NR submatrix is estimated.)
        zlacpy('L', NR, NR, V, LDV, CWORK(2 * N + 1).asMatrix(NR), NR);
        for (p = 1; p <= NR; p++) {
          TEMP1.value = dznrm2(NR - p + 1, CWORK(2 * N + (p - 1) * NR + p), 1);
          zdscal(NR - p + 1, ONE / TEMP1.value, CWORK(2 * N + (p - 1) * NR + p),
              1);
        }
        zpocon('L', NR, CWORK(2 * N + 1).asMatrix(NR), NR, ONE, TEMP1,
            CWORK(2 * N + NR * NR + 1), RWORK, IERR);
        CONDR1 = ONE / sqrt(TEMP1.value);
        // here need a second opinion on the condition number
        // then assume worst case scenario
        // R1 is OK for inverse <=> CONDR1 < N
        // more conservative    <=> CONDR1 < sqrt(N)
        COND_OK = sqrt(sqrt(NR));
        // [TP]       COND_OK is a tuning parameter.
        if (CONDR1 < COND_OK) {
          // the second QRF without pivoting. Note: in an optimized
          // implementation, this QRF should be implemented as the QRF
          // of a lower triangular matrix.
          // R1^* = Q2 * R2
          zgeqrf(N, NR, V, LDV, CWORK(N + 1), CWORK(2 * N + 1), LWORK - 2 * N,
              IERR);

          if (L2PERT) {
            XSC.value = sqrt(SMALL) / EPSLN;
            for (p = 2; p <= NR; p++) {
              for (q = 1; q <= p - 1; q++) {
                CTEMP = Complex(
                    XSC.value * min(V[p][p].abs(), V[q][q].abs()), ZERO);
                if (V[q][p].abs() <= TEMP1.value) {
                  // $                     V[q][p] = TEMP1 * ( V[q][p] / ABS(V[q][p]) )
                  V[q][p] = CTEMP;
                }
              }
            }
          }

          if (NR != N) {
            zlacpy('A', N, NR, V, LDV, CWORK(2 * N + 1).asMatrix(N), N);
          }
          // save

          // this transposed copy should be better than naive
          for (p = 1; p <= NR - 1; p++) {
            zcopy(NR - p, V(p, p + 1).asArray(), LDV, V(p + 1, p).asArray(), 1);
            zlacgv(NR - p + 1, V(p, p).asArray(), 1);
          }
          V[NR][NR] = V[NR][NR].conjugate();

          CONDR2 = CONDR1;
        } else {
          // ill-conditioned case: second QRF with pivoting
          // Note that windowed pivoting would be equally good
          // numerically, and more run-time efficient. So, in
          // an optimal implementation, the next call to ZGEQP3
          // should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
          // with properly (carefully) chosen parameters.

          // R1^* * P2 = Q2 * R2
          for (p = 1; p <= NR; p++) {
            IWORK[N + p] = 0;
          }
          zgeqp3(N, NR, V, LDV, IWORK(N + 1), CWORK(N + 1), CWORK(2 * N + 1),
              LWORK - 2 * N, RWORK, IERR);
// *               CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
// *     $              LWORK-2*N, IERR )
          if (L2PERT) {
            XSC.value = sqrt(SMALL);
            for (p = 2; p <= NR; p++) {
              for (q = 1; q <= p - 1; q++) {
                CTEMP = Complex(
                    XSC.value * min(V[p][p].abs(), V[q][q].abs()), ZERO);
                if (V[q][p].abs() <= TEMP1.value) {
                  // V[q][p] = TEMP1 * ( V[q][p] / ABS(V[q][p]) )
                  V[q][p] = CTEMP;
                }
              }
            }
          }

          zlacpy('A', N, NR, V, LDV, CWORK(2 * N + 1).asMatrix(N), N);

          if (L2PERT) {
            XSC.value = sqrt(SMALL);
            for (p = 2; p <= NR; p++) {
              for (q = 1; q <= p - 1; q++) {
                CTEMP = Complex(
                    XSC.value * min(V[p][p].abs(), V[q][q].abs()), ZERO);
                // V[p][q] = - TEMP1*( V[q][p] / ABS(V[q][p]) )
                V[p][q] = -CTEMP;
              }
            }
          } else {
            zlaset(
                'L', NR - 1, NR - 1, Complex.zero, Complex.zero, V(2, 1), LDV);
          }
          // Now, compute R2 = L3 * Q3, the LQ factorization.
          zgelqf(
              NR,
              NR,
              V,
              LDV,
              CWORK(2 * N + N * NR + 1),
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              IERR);
          // and estimate the condition number
          zlacpy('L', NR, NR, V, LDV,
              CWORK(2 * N + N * NR + NR + 1).asMatrix(NR), NR);
          for (p = 1; p <= NR; p++) {
            TEMP1.value = dznrm2(p, CWORK(2 * N + N * NR + NR + p), NR);
            zdscal(p, ONE / TEMP1.value, CWORK(2 * N + N * NR + NR + p), NR);
          }
          zpocon('L', NR, CWORK(2 * N + N * NR + NR + 1).asMatrix(NR), NR, ONE,
              TEMP1, CWORK(2 * N + N * NR + NR + NR * NR + 1), RWORK, IERR);
          CONDR2 = ONE / sqrt(TEMP1.value);

          if (CONDR2 >= COND_OK) {
            // save the Householder vectors used for Q3
            // (this overwrites the copy of R2, as it will not be
            // needed in this branch, but it does not overwrite the
            // Huseholder vectors of Q2.).
            zlacpy('U', NR, NR, V, LDV, CWORK(2 * N + 1).asMatrix(N), N);
            // and the rest of the information on Q3 is in
            // WORK(2*N+N*NR+1:2*N+N*NR+N)
          }
        }

        if (L2PERT) {
          XSC.value = sqrt(SMALL);
          for (q = 2; q <= NR; q++) {
            CTEMP = XSC.value.toComplex() * V[q][q];
            for (p = 1; p <= q - 1; p++) {
              // V[p][q] = - TEMP1*( V[p][q] / ABS(V[p][q]) )
              V[p][q] = -CTEMP;
            }
          }
        } else {
          zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);
        }

        // Second preconditioning finished; continue with Jacobi SVD
        // The input matrix is lower triangular.

        // Recover the right singular vectors as solution of a well
        // conditioned triangular matrix equation.
        if (CONDR1 < COND_OK) {
          zgesvj(
              'L',
              'U',
              'N',
              NR,
              NR,
              V,
              LDV,
              SVA,
              NR,
              U,
              LDU,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              RWORK,
              LRWORK,
              INFO);
          SCALEM = RWORK[1];
          NUMRANK = nint(RWORK[2]);
          for (p = 1; p <= NR; p++) {
            zcopy(NR, V(1, p).asArray(), 1, U(1, p).asArray(), 1);
            zdscal(NR, SVA[p], V(1, p).asArray(), 1);
          }

          // pick the right matrix equation and solve it
          if (NR == N) {
            // best case, R1 is inverted. The solution of this matrix
            // equation is Q2*V2 = the product of the Jacobi rotations
            // used in ZGESVJ, premultiplied with the orthogonal matrix
            // from the second QR factorization.
            ztrsm('L', 'U', 'N', 'N', NR, NR, Complex.one, A, LDA, V, LDV);
          } else {
            // R1 is well conditioned, but non-square. Adjoint of R2
            // is inverted to get the product of the Jacobi rotations
            // used in ZGESVJ. The Q-factor from the second QR
            // factorization is then built in explicitly.
            ztrsm('L', 'U', 'C', 'N', NR, NR, Complex.one,
                CWORK(2 * N + 1).asMatrix(N), N, V, LDV);
            if (NR < N) {
              zlaset('A', N - NR, NR, Complex.zero, Complex.zero, V(NR + 1, 1),
                  LDV);
              zlaset('A', NR, N - NR, Complex.zero, Complex.zero, V(1, NR + 1),
                  LDV);
              zlaset('A', N - NR, N - NR, Complex.zero, Complex.one,
                  V(NR + 1, NR + 1), LDV);
            }
            zunmqr(
                'L',
                'N',
                N,
                N,
                NR,
                CWORK(2 * N + 1).asMatrix(N),
                N,
                CWORK(N + 1),
                V,
                LDV,
                CWORK(2 * N + N * NR + NR + 1),
                LWORK - 2 * N - N * NR - NR,
                IERR);
          }
        } else if (CONDR2 < COND_OK) {
          // The matrix R2 is inverted. The solution of the matrix equation
          // is Q3^* * V3 = the product of the Jacobi rotations (applied to
          // the lower triangular L3 from the LQ factorization of
          // R2=L3*Q3), pre-multiplied with the transposed Q3.
          zgesvj(
              'L',
              'U',
              'N',
              NR,
              NR,
              V,
              LDV,
              SVA,
              NR,
              U,
              LDU,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              RWORK,
              LRWORK,
              INFO);
          SCALEM = RWORK[1];
          NUMRANK = nint(RWORK[2]);
          for (p = 1; p <= NR; p++) {
            zcopy(NR, V(1, p).asArray(), 1, U(1, p).asArray(), 1);
            zdscal(NR, SVA[p], U(1, p).asArray(), 1);
          }
          ztrsm('L', 'U', 'N', 'N', NR, NR, Complex.one,
              CWORK(2 * N + 1).asMatrix(N), N, U, LDU);
          // apply the permutation from the second QR factorization
          for (q = 1; q <= NR; q++) {
            for (p = 1; p <= NR; p++) {
              CWORK[2 * N + N * NR + NR + IWORK[N + p]] = U[p][q];
            }
            for (p = 1; p <= NR; p++) {
              U[p][q] = CWORK[2 * N + N * NR + NR + p];
            }
          }
          if (NR < N) {
            zlaset(
                'A', N - NR, NR, Complex.zero, Complex.zero, V(NR + 1, 1), LDV);
            zlaset(
                'A', NR, N - NR, Complex.zero, Complex.zero, V(1, NR + 1), LDV);
            zlaset('A', N - NR, N - NR, Complex.zero, Complex.one,
                V(NR + 1, NR + 1), LDV);
          }
          zunmqr(
              'L',
              'N',
              N,
              N,
              NR,
              CWORK(2 * N + 1).asMatrix(N),
              N,
              CWORK(N + 1),
              V,
              LDV,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              IERR);
        } else {
          // Last line of defense.
          // This is a rather pathological case: no scaled condition
          // improvement after two pivoted QR factorizations. Other
          // possibility is that the rank revealing QR factorization
          // or the condition estimator has failed, or the COND_OK
          // is set very close to ONE (which is unnecessary). Normally,
          // this branch should never be executed, but in rare cases of
          // failure of the RRQR or condition estimator, the last line of
          // defense ensures that ZGEJSV completes the task.
          // Compute the full SVD of L3 using ZGESVJ with explicit
          // accumulation of Jacobi rotations.
          zgesvj(
              'L',
              'U',
              'V',
              NR,
              NR,
              V,
              LDV,
              SVA,
              NR,
              U,
              LDU,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              RWORK,
              LRWORK,
              INFO);
          SCALEM = RWORK[1];
          NUMRANK = nint(RWORK[2]);
          if (NR < N) {
            zlaset(
                'A', N - NR, NR, Complex.zero, Complex.zero, V(NR + 1, 1), LDV);
            zlaset(
                'A', NR, N - NR, Complex.zero, Complex.zero, V(1, NR + 1), LDV);
            zlaset('A', N - NR, N - NR, Complex.zero, Complex.one,
                V(NR + 1, NR + 1), LDV);
          }
          zunmqr(
              'L',
              'N',
              N,
              N,
              NR,
              CWORK(2 * N + 1).asMatrix(N),
              N,
              CWORK(N + 1),
              V,
              LDV,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              IERR);

          zunmlq(
              'L',
              'C',
              NR,
              NR,
              NR,
              CWORK(2 * N + 1).asMatrix(N),
              N,
              CWORK(2 * N + N * NR + 1),
              U,
              LDU,
              CWORK(2 * N + N * NR + NR + 1),
              LWORK - 2 * N - N * NR - NR,
              IERR);
          for (q = 1; q <= NR; q++) {
            for (p = 1; p <= NR; p++) {
              CWORK[2 * N + N * NR + NR + IWORK[N + p]] = U[p][q];
            }
            for (p = 1; p <= NR; p++) {
              U[p][q] = CWORK[2 * N + N * NR + NR + p];
            }
          }
        }

        // Permute the rows of V using the (column) permutation from the
        // first QRF. Also, scale the columns to make them unit in
        // Euclidean norm. This applies to all cases.
        TEMP1.value = sqrt(N) * EPSLN;
        for (q = 1; q <= N; q++) {
          for (p = 1; p <= N; p++) {
            CWORK[2 * N + N * NR + NR + IWORK[p]] = V[p][q];
          }
          for (p = 1; p <= N; p++) {
            V[p][q] = CWORK[2 * N + N * NR + NR + p];
          }
          XSC.value = ONE / dznrm2(N, V(1, q).asArray(), 1);
          if ((XSC.value < (ONE - TEMP1.value)) ||
              (XSC.value > (ONE + TEMP1.value))) {
            zdscal(N, XSC.value, V(1, q).asArray(), 1);
          }
        }
        // At this moment, V contains the right singular vectors of A.
        // Next, assemble the left singular vector matrix U (M x N).
        if (NR < M) {
          zlaset(
              'A', M - NR, NR, Complex.zero, Complex.zero, U(NR + 1, 1), LDU);
          if (NR < N1) {
            zlaset('A', NR, N1 - NR, Complex.zero, Complex.zero, U(1, NR + 1),
                LDU);
            zlaset('A', M - NR, N1 - NR, Complex.zero, Complex.one,
                U(NR + 1, NR + 1), LDU);
          }
        }

        // The Q matrix from the first QRF is built into the left singular
        // matrix U. This applies to all cases.
        zunmqr('L', 'N', M, N1, N, A, LDA, CWORK, U, LDU, CWORK(N + 1),
            LWORK - N, IERR);

        // The columns of U are normalized. The cost is O(M*N) flops.
        TEMP1.value = sqrt(M) * EPSLN;
        for (p = 1; p <= NR; p++) {
          XSC.value = ONE / dznrm2(M, U(1, p).asArray(), 1);
          if ((XSC.value < (ONE - TEMP1.value)) ||
              (XSC.value > (ONE + TEMP1.value))) {
            zdscal(M, XSC.value, U(1, p).asArray(), 1);
          }
        }

        // If the initial QRF is computed with row pivoting, the left
        // singular vectors must be adjusted.
        if (ROWPIV) zlaswp(N1, U, LDU, 1, M - 1, IWORK(IWOFF + 1), -1);
      } else {
        // the initial matrix A has almost orthogonal columns and
        // the second QRF is not needed
        zlacpy('U', N, N, A, LDA, CWORK(N + 1).asMatrix(N), N);
        if (L2PERT) {
          XSC.value = sqrt(SMALL);
          for (p = 2; p <= N; p++) {
            CTEMP = XSC.value.toComplex() * CWORK[N + (p - 1) * N + p];
            for (q = 1; q <= p - 1; q++) {
              // CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
              // $                                        ABS(CWORK(N+(p-1)*N+q)) )
              CWORK[N + (q - 1) * N + p] = -CTEMP;
            }
          }
        } else {
          zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero,
              CWORK(N + 2).asMatrix(N), N);
        }

        zgesvj('U', 'U', 'N', N, N, CWORK(N + 1).asMatrix(N), N, SVA, N, U, LDU,
            CWORK(N + N * N + 1), LWORK - N - N * N, RWORK, LRWORK, INFO);

        SCALEM = RWORK[1];
        NUMRANK = nint(RWORK[2]);
        for (p = 1; p <= N; p++) {
          zcopy(N, CWORK(N + (p - 1) * N + 1), 1, U(1, p).asArray(), 1);
          zdscal(N, SVA[p], CWORK(N + (p - 1) * N + 1), 1);
        }

        ztrsm('L', 'U', 'N', 'N', N, N, Complex.one, A, LDA,
            CWORK(N + 1).asMatrix(N), N);
        for (p = 1; p <= N; p++) {
          zcopy(N, CWORK(N + p), N, V(IWORK[p], 1).asArray(), LDV);
        }
        TEMP1.value = sqrt(N) * EPSLN;
        for (p = 1; p <= N; p++) {
          XSC.value = ONE / dznrm2(N, V(1, p).asArray(), 1);
          if ((XSC.value < (ONE - TEMP1.value)) ||
              (XSC.value > (ONE + TEMP1.value))) {
            zdscal(N, XSC.value, V(1, p).asArray(), 1);
          }
        }

        // Assemble the left singular vector matrix U (M x N).
        if (N < M) {
          zlaset('A', M - N, N, Complex.zero, Complex.zero, U(N + 1, 1), LDU);
          if (N < N1) {
            zlaset(
                'A', N, N1 - N, Complex.zero, Complex.zero, U(1, N + 1), LDU);
            zlaset('A', M - N, N1 - N, Complex.zero, Complex.one,
                U(N + 1, N + 1), LDU);
          }
        }
        zunmqr('L', 'N', M, N1, N, A, LDA, CWORK, U, LDU, CWORK(N + 1),
            LWORK - N, IERR);
        TEMP1.value = sqrt(M) * EPSLN;
        for (p = 1; p <= N1; p++) {
          XSC.value = ONE / dznrm2(M, U(1, p).asArray(), 1);
          if ((XSC.value < (ONE - TEMP1.value)) ||
              (XSC.value > (ONE + TEMP1.value))) {
            zdscal(M, XSC.value, U(1, p).asArray(), 1);
          }
        }

        if (ROWPIV) zlaswp(N1, U, LDU, 1, M - 1, IWORK(IWOFF + 1), -1);
      }

      // end of the  >> almost orthogonal case <<  in the full SVD
    } else {
      // This branch deploys a preconditioned Jacobi SVD with explicitly
      // accumulated rotations. It is included as optional, mainly for
      // experimental purposes. It does perform well, and can also be used.
      // In this implementation, this branch will be automatically activated
      // if the  condition number sigma_max(A) / sigma_min(A) is predicted
      // to be greater than the overflow threshold. This is because the
      // a posteriori computation of the singular vectors assumes robust
      // implementation of BLAS and some LAPACK procedures, capable of working
      // in presence of extreme values, e.g. when the singular values spread from
      // the underflow to the overflow threshold.
      for (p = 1; p <= NR; p++) {
        zcopy(N - p + 1, A(p, p).asArray(), LDA, V(p, p).asArray(), 1);
        zlacgv(N - p + 1, V(p, p).asArray(), 1);
      }

      if (L2PERT) {
        XSC.value = sqrt(SMALL / EPSLN);
        for (q = 1; q <= NR; q++) {
          CTEMP = Complex(XSC.value * V[q][q].abs(), ZERO);
          for (p = 1; p <= N; p++) {
            if ((p > q) && (V[p][q].abs() <= TEMP1.value) || (p < q)) {
              // $                V[p][q] = TEMP1 * ( V[p][q] / ABS(V[p][q]) )
              V[p][q] = CTEMP;
            }
            if (p < q) V[p][q] = -V[p][q];
          }
        }
      } else {
        zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, V(1, 2), LDV);
      }
      zgeqrf(
          N, NR, V, LDV, CWORK(N + 1), CWORK(2 * N + 1), LWORK - 2 * N, IERR);
      zlacpy('L', N, NR, V, LDV, CWORK(2 * N + 1).asMatrix(N), N);

      for (p = 1; p <= NR; p++) {
        zcopy(NR - p + 1, V(p, p).asArray(), LDV, U(p, p).asArray(), 1);
        zlacgv(NR - p + 1, U(p, p).asArray(), 1);
      }

      if (L2PERT) {
        XSC.value = sqrt(SMALL / EPSLN);
        for (q = 2; q <= NR; q++) {
          for (p = 1; p <= q - 1; p++) {
            CTEMP =
                Complex(XSC.value * min(U[p][p].abs(), U[q][q].abs()), ZERO);
            // U[p][q] = - TEMP1 * ( U[q][p] / ABS(U[q][p]) )
            U[p][q] = -CTEMP;
          }
        }
      } else {
        zlaset('U', NR - 1, NR - 1, Complex.zero, Complex.zero, U(1, 2), LDU);
      }
      zgesvj(
          'L',
          'U',
          'V',
          NR,
          NR,
          U,
          LDU,
          SVA,
          N,
          V,
          LDV,
          CWORK(2 * N + N * NR + 1),
          LWORK - 2 * N - N * NR,
          RWORK,
          LRWORK,
          INFO);
      SCALEM = RWORK[1];
      NUMRANK = nint(RWORK[2]);

      if (NR < N) {
        zlaset('A', N - NR, NR, Complex.zero, Complex.zero, V(NR + 1, 1), LDV);
        zlaset('A', NR, N - NR, Complex.zero, Complex.zero, V(1, NR + 1), LDV);
        zlaset('A', N - NR, N - NR, Complex.zero, Complex.one,
            V(NR + 1, NR + 1), LDV);
      }
      zunmqr(
          'L',
          'N',
          N,
          N,
          NR,
          CWORK(2 * N + 1).asMatrix(N),
          N,
          CWORK(N + 1),
          V,
          LDV,
          CWORK(2 * N + N * NR + NR + 1),
          LWORK - 2 * N - N * NR - NR,
          IERR);

      // Permute the rows of V using the (column) permutation from the
      // first QRF. Also, scale the columns to make them unit in
      // Euclidean norm. This applies to all cases.
      TEMP1.value = sqrt(N) * EPSLN;
      for (q = 1; q <= N; q++) {
        for (p = 1; p <= N; p++) {
          CWORK[2 * N + N * NR + NR + IWORK[p]] = V[p][q];
        }
        for (p = 1; p <= N; p++) {
          V[p][q] = CWORK[2 * N + N * NR + NR + p];
        }
        XSC.value = ONE / dznrm2(N, V(1, q).asArray(), 1);
        if ((XSC.value < (ONE - TEMP1.value)) ||
            (XSC.value > (ONE + TEMP1.value))) {
          zdscal(N, XSC.value, V(1, q).asArray(), 1);
        }
      }

      // At this moment, V contains the right singular vectors of A.
      // Next, assemble the left singular vector matrix U (M x N).
      if (NR < M) {
        zlaset('A', M - NR, NR, Complex.zero, Complex.zero, U(NR + 1, 1), LDU);
        if (NR < N1) {
          zlaset(
              'A', NR, N1 - NR, Complex.zero, Complex.zero, U(1, NR + 1), LDU);
          zlaset('A', M - NR, N1 - NR, Complex.zero, Complex.one,
              U(NR + 1, NR + 1), LDU);
        }
      }

      zunmqr('L', 'N', M, N1, N, A, LDA, CWORK, U, LDU, CWORK(N + 1), LWORK - N,
          IERR);

      if (ROWPIV) zlaswp(N1, U, LDU, 1, M - 1, IWORK(IWOFF + 1), -1);
    }
    if (TRANSP) {
      // swap U and V because the procedure worked on A^*
      for (p = 1; p <= N; p++) {
        zswap(N, U(1, p).asArray(), 1, V(1, p).asArray(), 1);
      }
    }
  }
  // end of the full SVD

  // Undo scaling, if necessary (and possible)
  if (USCAL2 <= (BIG / SVA[1]) * USCAL1) {
    dlascl('G', 0, 0, USCAL1, USCAL2, NR, 1, SVA.asMatrix(N), N, IERR);
    USCAL1 = ONE;
    USCAL2 = ONE;
  }

  if (NR < N) {
    for (p = NR + 1; p <= N; p++) {
      SVA[p] = ZERO;
    }
  }

  RWORK[1] = USCAL2 * SCALEM;
  RWORK[2] = USCAL1;
  if (ERREST) RWORK[3] = SCONDA;
  if (LSVEC && RSVEC) {
    RWORK[4] = CONDR1;
    RWORK[5] = CONDR2;
  }
  if (L2TRAN) {
    RWORK[6] = ENTRA;
    RWORK[7] = ENTRAT;
  }

  IWORK[1] = NR;
  IWORK[2] = NUMRANK;
  IWORK[3] = WARNING;
  if (TRANSP) {
    IWORK[4] = 1;
  } else {
    IWORK[4] = -1;
  }
}
zgejsv function

Implementation

lapack library