zgelss function

void zgelss(
  1. int M,
  2. int N,
  3. int NRHS,
  4. Matrix<Complex> A_,
  5. int LDA,
  6. Matrix<Complex> B_,
  7. int LDB,
  8. Array<double> S_,
  9. double RCOND,
  10. Box<int> RANK,
  11. Array<Complex> WORK_,
  12. int LWORK,
  13. Array<double> RWORK_,
  14. Box<int> INFO,
)

Implementation

void zgelss(
  final int M,
  final int N,
  final int NRHS,
  final Matrix<Complex> A_,
  final int LDA,
  final Matrix<Complex> B_,
  final int LDB,
  final Array<double> S_,
  final double RCOND,
  final Box<int> RANK,
  final Array<Complex> WORK_,
  final int LWORK,
  final Array<double> RWORK_,
  final Box<int> INFO,
) {
  final A = A_.having(ld: LDA);
  final B = B_.having(ld: LDB);
  final S = S_.having();
  final WORK = WORK_.having();
  final RWORK = RWORK_.having();
  const ZERO = 0.0, ONE = 1.0;
  bool LQUERY;
  int BL,
      CHUNK,
      I,
      IASCL,
      IBSCL,
      IE,
      IL,
      IRWORK,
      ITAU,
      ITAUP,
      ITAUQ,
      IWORK,
      LDWORK,
      MAXMN,
      MAXWRK = 0,
      MINMN,
      MINWRK,
      MM,
      MNTHR = 0;
  int LWORK_ZGEBRD, LWORK_ZUNMBR, LWORK_ZUNGBR, LWORK_ZUNMLQ, LWORK_ZGELQF;
  double ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR;
  final DUM = Array<Complex>(1);

  // Test the input arguments

  INFO.value = 0;
  MINMN = min(M, N);
  MAXMN = max(M, N);
  LQUERY = (LWORK == -1);
  if (M < 0) {
    INFO.value = -1;
  } else if (N < 0) {
    INFO.value = -2;
  } else if (NRHS < 0) {
    INFO.value = -3;
  } else if (LDA < max(1, M)) {
    INFO.value = -5;
  } else if (LDB < max(1, MAXMN)) {
    INFO.value = -7;
  }

  // Compute workspace
  //  (Note: Comments in the code beginning "Workspace:" describe the
  //   minimal amount of workspace needed at that point in the code,
  //   as well as the preferred amount for good performance.
  //   CWorkspace refers to complex workspace, and RWorkspace refers
  //   to real workspace. NB refers to the optimal block size for the
  //   immediately following subroutine, as returned by ILAENV.)

  if (INFO.value == 0) {
    MINWRK = 1;
    MAXWRK = 1;
    if (MINMN > 0) {
      MM = M;
      MNTHR = ilaenv(6, 'ZGELSS', ' ', M, N, NRHS, -1);
      if (M >= N && M >= MNTHR) {
        // Path 1a - overdetermined, with many more rows than
        //           columns

        // Compute space needed for ZGEQRF
        zgeqrf(M, N, A, LDA, DUM(1), DUM(1), -1, INFO);
        // Compute space needed for ZUNMQR
        zunmqr('L', 'C', M, NRHS, N, A, LDA, DUM(1), B, LDB, DUM(1), -1, INFO);
        MM = N;
        MAXWRK = max(MAXWRK, N + N * ilaenv(1, 'ZGEQRF', ' ', M, N, -1, -1));
        MAXWRK =
            max(MAXWRK, N + NRHS * ilaenv(1, 'ZUNMQR', 'LC', M, NRHS, N, -1));
      }
      if (M >= N) {
        // Path 1 - overdetermined or exactly determined

        // Compute space needed for ZGEBRD
        zgebrd(MM, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), -1, INFO);
        LWORK_ZGEBRD = DUM[1].toInt();
        // Compute space needed for ZUNMBR
        zunmbr('Q', 'L', 'C', MM, NRHS, N, A, LDA, DUM(1), B, LDB, DUM(1), -1,
            INFO);
        LWORK_ZUNMBR = DUM[1].toInt();
        // Compute space needed for ZUNGBR
        zungbr('P', N, N, N, A, LDA, DUM(1), DUM(1), -1, INFO);
        LWORK_ZUNGBR = DUM[1].toInt();
        // Compute total workspace needed
        MAXWRK = max(MAXWRK, 2 * N + LWORK_ZGEBRD);
        MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNMBR);
        MAXWRK = max(MAXWRK, 2 * N + LWORK_ZUNGBR);
        MAXWRK = max(MAXWRK, N * NRHS);
        MINWRK = 2 * N + max(NRHS, M);
      }
      if (N > M) {
        MINWRK = 2 * M + max(NRHS, N);
        if (N >= MNTHR) {
          // Path 2a - underdetermined, with many more columns
          // than rows

          // Compute space needed for ZGELQF
          zgelqf(M, N, A, LDA, DUM(1), DUM(1), -1, INFO);
          LWORK_ZGELQF = DUM[1].toInt();
          // Compute space needed for ZGEBRD
          zgebrd(M, M, A, LDA, S, S, DUM(1), DUM(1), DUM(1), -1, INFO);
          LWORK_ZGEBRD = DUM[1].toInt();
          // Compute space needed for ZUNMBR
          zunmbr('Q', 'L', 'C', M, NRHS, N, A, LDA, DUM(1), B, LDB, DUM(1), -1,
              INFO);
          LWORK_ZUNMBR = DUM[1].toInt();
          // Compute space needed for ZUNGBR
          zungbr('P', M, M, M, A, LDA, DUM(1), DUM(1), -1, INFO);
          LWORK_ZUNGBR = DUM[1].toInt();
          // Compute space needed for ZUNMLQ
          zunmlq(
              'L', 'C', N, NRHS, M, A, LDA, DUM(1), B, LDB, DUM(1), -1, INFO);
          LWORK_ZUNMLQ = DUM[1].toInt();
          // Compute total workspace needed
          MAXWRK = M + LWORK_ZGELQF;
          MAXWRK = max(MAXWRK, 3 * M + M * M + LWORK_ZGEBRD);
          MAXWRK = max(MAXWRK, 3 * M + M * M + LWORK_ZUNMBR);
          MAXWRK = max(MAXWRK, 3 * M + M * M + LWORK_ZUNGBR);
          if (NRHS > 1) {
            MAXWRK = max(MAXWRK, M * M + M + M * NRHS);
          } else {
            MAXWRK = max(MAXWRK, M * M + 2 * M);
          }
          MAXWRK = max(MAXWRK, M + LWORK_ZUNMLQ);
        } else {
          // Path 2 - underdetermined

          // Compute space needed for ZGEBRD
          zgebrd(M, N, A, LDA, S, S, DUM(1), DUM(1), DUM(1), -1, INFO);
          LWORK_ZGEBRD = DUM[1].toInt();
          // Compute space needed for ZUNMBR
          zunmbr('Q', 'L', 'C', M, NRHS, M, A, LDA, DUM(1), B, LDB, DUM(1), -1,
              INFO);
          LWORK_ZUNMBR = DUM[1].toInt();
          // Compute space needed for ZUNGBR
          zungbr('P', M, N, M, A, LDA, DUM(1), DUM(1), -1, INFO);
          LWORK_ZUNGBR = DUM[1].toInt();
          MAXWRK = 2 * M + LWORK_ZGEBRD;
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNMBR);
          MAXWRK = max(MAXWRK, 2 * M + LWORK_ZUNGBR);
          MAXWRK = max(MAXWRK, N * NRHS);
        }
      }
      MAXWRK = max(MINWRK, MAXWRK);
    }
    WORK[1] = MAXWRK.toComplex();

    if (LWORK < MINWRK && !LQUERY) INFO.value = -12;
  }

  if (INFO.value != 0) {
    xerbla('ZGELSS', -INFO.value);
    return;
  } else if (LQUERY) {
    return;
  }

  // Quick return if possible

  if (M == 0 || N == 0) {
    RANK.value = 0;
    return;
  }

  // Get machine parameters

  EPS = dlamch('P');
  SFMIN = dlamch('S');
  SMLNUM = SFMIN / EPS;
  BIGNUM = ONE / SMLNUM;

  // Scale A if max element outside range [SMLNUM,BIGNUM]

  ANRM = zlange('M', M, N, A, LDA, RWORK);
  IASCL = 0;
  if (ANRM > ZERO && ANRM < SMLNUM) {
    // Scale matrix norm up to SMLNUM

    zlascl('G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO);
    IASCL = 1;
  } else if (ANRM > BIGNUM) {
    // Scale matrix norm down to BIGNUM

    zlascl('G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO);
    IASCL = 2;
  } else if (ANRM == ZERO) {
    // Matrix all zero. Return zero solution.

    zlaset('F', max(M, N), NRHS, Complex.zero, Complex.zero, B, LDB);
    dlaset('F', MINMN, 1, ZERO, ZERO, S.asMatrix(MINMN), MINMN);
    RANK.value = 0;
    WORK[1] = MAXWRK.toComplex();
    return;
  }

  // Scale B if max element outside range [SMLNUM,BIGNUM]

  BNRM = zlange('M', M, NRHS, B, LDB, RWORK);
  IBSCL = 0;
  if (BNRM > ZERO && BNRM < SMLNUM) {
    // Scale matrix norm up to SMLNUM

    zlascl('G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO);
    IBSCL = 1;
  } else if (BNRM > BIGNUM) {
    // Scale matrix norm down to BIGNUM

    zlascl('G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO);
    IBSCL = 2;
  }

  // Overdetermined case

  if (M >= N) {
    // Path 1 - overdetermined or exactly determined

    MM = M;
    if (M >= MNTHR) {
      // Path 1a - overdetermined, with many more rows than columns

      MM = N;
      ITAU = 1;
      IWORK = ITAU + N;

      // Compute A=Q*R
      // (CWorkspace: need 2*N, prefer N+N*NB)
      // (RWorkspace: none)

      zgeqrf(M, N, A, LDA, WORK(ITAU), WORK(IWORK), LWORK - IWORK + 1, INFO);

      // Multiply B by transpose(Q)
      // (CWorkspace: need N+NRHS, prefer N+NRHS*NB)
      // (RWorkspace: none)

      zunmqr('L', 'C', M, NRHS, N, A, LDA, WORK(ITAU), B, LDB, WORK(IWORK),
          LWORK - IWORK + 1, INFO);

      // Zero out below R

      if (N > 1) {
        zlaset('L', N - 1, N - 1, Complex.zero, Complex.zero, A(2, 1), LDA);
      }
    }

    IE = 1;
    ITAUQ = 1;
    ITAUP = ITAUQ + N;
    IWORK = ITAUP + N;

    // Bidiagonalize R in A
    // (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
    // (RWorkspace: need N)

    zgebrd(MM, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(IWORK),
        LWORK - IWORK + 1, INFO);

    // Multiply B by transpose of left bidiagonalizing vectors of R
    // (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
    // (RWorkspace: none)

    zunmbr('Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK(ITAUQ), B, LDB, WORK(IWORK),
        LWORK - IWORK + 1, INFO);

    // Generate right bidiagonalizing vectors of R in A
    // (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
    // (RWorkspace: none)

    zungbr('P', N, N, N, A, LDA, WORK(ITAUP), WORK(IWORK), LWORK - IWORK + 1,
        INFO);
    IRWORK = IE + N;

    // Perform bidiagonal QR iteration
    //   multiply B by transpose of left singular vectors
    //   compute right singular vectors in A
    // (CWorkspace: none)
    // (RWorkspace: need BDSPAC)

    zbdsqr('U', N, N, 0, NRHS, S, RWORK(IE), A, LDA, DUM.asMatrix(1), 1, B, LDB,
        RWORK(IRWORK), INFO);
    if (INFO.value != 0) {
      WORK[1] = MAXWRK.toComplex();
      return;
    }

    // Multiply B by reciprocals of singular values

    THR = max(RCOND * S[1], SFMIN);
    if (RCOND < ZERO) THR = max(EPS * S[1], SFMIN);
    RANK.value = 0;
    for (I = 1; I <= N; I++) {
      if (S[I] > THR) {
        zdrscl(NRHS, S[I], B(I, 1).asArray(), LDB);
        RANK.value++;
      } else {
        zlaset('F', 1, NRHS, Complex.zero, Complex.zero, B(I, 1), LDB);
      }
    }

    // Multiply B by right singular vectors
    // (CWorkspace: need N, prefer N*NRHS)
    // (RWorkspace: none)

    if (LWORK >= LDB * NRHS && NRHS > 1) {
      zgemm('C', 'N', N, NRHS, N, Complex.one, A, LDA, B, LDB, Complex.zero,
          WORK.asMatrix(LDB), LDB);
      zlacpy('G', N, NRHS, WORK.asMatrix(LDB), LDB, B, LDB);
    } else if (NRHS > 1) {
      CHUNK = LWORK ~/ N;
      for (I = 1; I <= NRHS; I += CHUNK) {
        BL = min(NRHS - I + 1, CHUNK);
        zgemm('C', 'N', N, BL, N, Complex.one, A, LDA, B(1, I), LDB,
            Complex.zero, WORK.asMatrix(N), N);
        zlacpy('G', N, BL, WORK.asMatrix(N), N, B(1, I), LDB);
      }
    } else if (NRHS == 1) {
      zgemv('C', N, N, Complex.one, A, LDA, B.asArray(), 1, Complex.zero, WORK,
          1);
      zcopy(N, WORK, 1, B.asArray(), 1);
    }
  } else if (N >= MNTHR &&
      LWORK >= 3 * M + M * M + max(M, max(NRHS, N - 2 * M))) {
    // Underdetermined case, M much less than N

    // Path 2a - underdetermined, with many more columns than rows
    // and sufficient workspace for an efficient algorithm

    LDWORK = M;
    if (LWORK >= 3 * M + M * LDA + max(M, max(NRHS, N - 2 * M))) LDWORK = LDA;
    ITAU = 1;
    IWORK = M + 1;

    // Compute A=L*Q
    // (CWorkspace: need 2*M, prefer M+M*NB)
    // (RWorkspace: none)

    zgelqf(M, N, A, LDA, WORK(ITAU), WORK(IWORK), LWORK - IWORK + 1, INFO);
    IL = IWORK;

    // Copy L to WORK(IL), zeroing out above it

    zlacpy('L', M, M, A, LDA, WORK(IL).asMatrix(LDWORK), LDWORK);
    zlaset('U', M - 1, M - 1, Complex.zero, Complex.zero,
        WORK(IL + LDWORK).asMatrix(LDWORK), LDWORK);
    IE = 1;
    ITAUQ = IL + LDWORK * M;
    ITAUP = ITAUQ + M;
    IWORK = ITAUP + M;

    // Bidiagonalize L in WORK(IL)
    // (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
    // (RWorkspace: need M)

    zgebrd(M, M, WORK(IL).asMatrix(LDWORK), LDWORK, S, RWORK(IE), WORK(ITAUQ),
        WORK(ITAUP), WORK(IWORK), LWORK - IWORK + 1, INFO);

    // Multiply B by transpose of left bidiagonalizing vectors of L
    // (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB)
    // (RWorkspace: none)

    zunmbr('Q', 'L', 'C', M, NRHS, M, WORK(IL).asMatrix(LDWORK), LDWORK,
        WORK(ITAUQ), B, LDB, WORK(IWORK), LWORK - IWORK + 1, INFO);

    // Generate right bidiagonalizing vectors of R in WORK(IL)
    // (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
    // (RWorkspace: none)

    zungbr('P', M, M, M, WORK(IL).asMatrix(LDWORK), LDWORK, WORK(ITAUP),
        WORK(IWORK), LWORK - IWORK + 1, INFO);
    IRWORK = IE + M;

    // Perform bidiagonal QR iteration, computing right singular
    // vectors of L in WORK(IL) and multiplying B by transpose of
    // left singular vectors
    // (CWorkspace: need M*M)
    // (RWorkspace: need BDSPAC)

    zbdsqr('U', M, M, 0, NRHS, S, RWORK(IE), WORK(IL).asMatrix(LDWORK), LDWORK,
        A, LDA, B, LDB, RWORK(IRWORK), INFO);
    if (INFO.value != 0) {
      WORK[1] = MAXWRK.toComplex();
      return;
    }

    // Multiply B by reciprocals of singular values

    THR = max(RCOND * S[1], SFMIN);
    if (RCOND < ZERO) THR = max(EPS * S[1], SFMIN);
    RANK.value = 0;
    for (I = 1; I <= M; I++) {
      if (S[I] > THR) {
        zdrscl(NRHS, S[I], B(I, 1).asArray(), LDB);
        RANK.value++;
      } else {
        zlaset('F', 1, NRHS, Complex.zero, Complex.zero, B(I, 1), LDB);
      }
    }
    IWORK = IL + M * LDWORK;

    // Multiply B by right singular vectors of L in WORK(IL)
    // (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS)
    // (RWorkspace: none)

    if (LWORK >= LDB * NRHS + IWORK - 1 && NRHS > 1) {
      zgemm('C', 'N', M, NRHS, M, Complex.one, WORK(IL).asMatrix(LDWORK),
          LDWORK, B, LDB, Complex.zero, WORK(IWORK).asMatrix(LDB), LDB);
      zlacpy('G', M, NRHS, WORK(IWORK).asMatrix(LDB), LDB, B, LDB);
    } else if (NRHS > 1) {
      CHUNK = (LWORK - IWORK + 1) ~/ M;
      for (I = 1; I <= NRHS; I += CHUNK) {
        BL = min(NRHS - I + 1, CHUNK);
        zgemm('C', 'N', M, BL, M, Complex.one, WORK(IL).asMatrix(LDWORK),
            LDWORK, B(1, I), LDB, Complex.zero, WORK(IWORK).asMatrix(M), M);
        zlacpy('G', M, BL, WORK(IWORK).asMatrix(M), M, B(1, I), LDB);
      }
    } else if (NRHS == 1) {
      zgemv('C', M, M, Complex.one, WORK(IL).asMatrix(LDWORK), LDWORK,
          B(1, 1).asArray(), 1, Complex.zero, WORK(IWORK), 1);
      zcopy(M, WORK(IWORK), 1, B(1, 1).asArray(), 1);
    }

    // Zero out below first M rows of B

    zlaset('F', N - M, NRHS, Complex.zero, Complex.zero, B(M + 1, 1), LDB);
    IWORK = ITAU + M;

    // Multiply transpose(Q) by B
    // (CWorkspace: need M+NRHS, prefer M+NHRS*NB)
    // (RWorkspace: none)

    zunmlq('L', 'C', N, NRHS, M, A, LDA, WORK(ITAU), B, LDB, WORK(IWORK),
        LWORK - IWORK + 1, INFO);
  } else {
    // Path 2 - remaining underdetermined cases

    IE = 1;
    ITAUQ = 1;
    ITAUP = ITAUQ + M;
    IWORK = ITAUP + M;

    // Bidiagonalize A
    // (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB)
    // (RWorkspace: need N)

    zgebrd(M, N, A, LDA, S, RWORK(IE), WORK(ITAUQ), WORK(ITAUP), WORK(IWORK),
        LWORK - IWORK + 1, INFO);

    // Multiply B by transpose of left bidiagonalizing vectors
    // (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
    // (RWorkspace: none)

    zunmbr('Q', 'L', 'C', M, NRHS, N, A, LDA, WORK(ITAUQ), B, LDB, WORK(IWORK),
        LWORK - IWORK + 1, INFO);

    // Generate right bidiagonalizing vectors in A
    // (CWorkspace: need 3*M, prefer 2*M+M*NB)
    // (RWorkspace: none)

    zungbr('P', M, N, M, A, LDA, WORK(ITAUP), WORK(IWORK), LWORK - IWORK + 1,
        INFO);
    IRWORK = IE + M;

    // Perform bidiagonal QR iteration,
    //    computing right singular vectors of A in A and
    //    multiplying B by transpose of left singular vectors
    // (CWorkspace: none)
    // (RWorkspace: need BDSPAC)

    zbdsqr('L', M, N, 0, NRHS, S, RWORK(IE), A, LDA, DUM.asMatrix(1), 1, B, LDB,
        RWORK(IRWORK), INFO);
    if (INFO.value != 0) {
      WORK[1] = MAXWRK.toComplex();
      return;
    }

    // Multiply B by reciprocals of singular values

    THR = max(RCOND * S[1], SFMIN);
    if (RCOND < ZERO) THR = max(EPS * S[1], SFMIN);
    RANK.value = 0;
    for (I = 1; I <= M; I++) {
      if (S[I] > THR) {
        zdrscl(NRHS, S[I], B(I, 1).asArray(), LDB);
        RANK.value++;
      } else {
        zlaset('F', 1, NRHS, Complex.zero, Complex.zero, B(I, 1), LDB);
      }
    }

    // Multiply B by right singular vectors of A
    // (CWorkspace: need N, prefer N*NRHS)
    // (RWorkspace: none)

    if (LWORK >= LDB * NRHS && NRHS > 1) {
      zgemm('C', 'N', N, NRHS, M, Complex.one, A, LDA, B, LDB, Complex.zero,
          WORK.asMatrix(LDB), LDB);
      zlacpy('G', N, NRHS, WORK.asMatrix(LDB), LDB, B, LDB);
    } else if (NRHS > 1) {
      CHUNK = LWORK ~/ N;
      for (I = 1; I <= NRHS; I += CHUNK) {
        BL = min(NRHS - I + 1, CHUNK);
        zgemm('C', 'N', N, BL, M, Complex.one, A, LDA, B(1, I), LDB,
            Complex.zero, WORK.asMatrix(N), N);
        zlacpy('F', N, BL, WORK.asMatrix(N), N, B(1, I), LDB);
      }
    } else if (NRHS == 1) {
      zgemv('C', M, N, Complex.one, A, LDA, B.asArray(), 1, Complex.zero, WORK,
          1);
      zcopy(N, WORK, 1, B.asArray(), 1);
    }
  }

  // Undo scaling

  if (IASCL == 1) {
    zlascl('G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO);
    dlascl('G', 0, 0, SMLNUM, ANRM, MINMN, 1, S.asMatrix(MINMN), MINMN, INFO);
  } else if (IASCL == 2) {
    zlascl('G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO);
    dlascl('G', 0, 0, BIGNUM, ANRM, MINMN, 1, S.asMatrix(MINMN), MINMN, INFO);
  }
  if (IBSCL == 1) {
    zlascl('G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO);
  } else if (IBSCL == 2) {
    zlascl('G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO);
  }
  WORK[1] = MAXWRK.toComplex();
}