dgeqp3rk function

void dgeqp3rk(
  1. int M,
  2. int N,
  3. int NRHS,
  4. int KMAX,
  5. Box<double> ABSTOL,
  6. Box<double> RELTOL,
  7. Matrix<double> A_,
  8. int LDA,
  9. Box<int> K,
  10. Box<double> MAXC2NRMK,
  11. Box<double> RELMAXC2NRMK,
  12. Array<int> JPIV_,
  13. Array<double> TAU_,
  14. Array<double> WORK_,
  15. int LWORK,
  16. Array<int> IWORK_,
  17. Box<int> INFO,
)

Implementation

void dgeqp3rk(
  final int M,
  final int N,
  final int NRHS,
  final int KMAX,
  final Box<double> ABSTOL,
  final Box<double> RELTOL,
  final Matrix<double> A_,
  final int LDA,
  final Box<int> K,
  final Box<double> MAXC2NRMK,
  final Box<double> RELMAXC2NRMK,
  final Array<int> JPIV_,
  final Array<double> TAU_,
  final Array<double> WORK_,
  final int LWORK,
  final Array<int> IWORK_,
  final Box<int> INFO,
) {
  final A = A_.having(ld: LDA);
  final WORK = WORK_.having();
  final JPIV = JPIV_.having();
  final TAU = TAU_.having();
  final IWORK = IWORK_.having();
  const INB = 1, INBMIN = 2, IXOVER = 3;
  const ZERO = 0.0, ONE = 1.0, TWO = 2.0;
  int J, NB = 0, NBMIN, NX;
  final IINFO = Box(0);
  final DONE = Box(false);

  // Test input arguments
  INFO.value = 0;
  final LQUERY = (LWORK == -1);
  if (M < 0) {
    INFO.value = -1;
  } else if (N < 0) {
    INFO.value = -2;
  } else if (NRHS < 0) {
    INFO.value = -3;
  } else if (KMAX < 0) {
    INFO.value = -4;
  } else if (disnan(ABSTOL.value)) {
    INFO.value = -5;
  } else if (disnan(RELTOL.value)) {
    INFO.value = -6;
  } else if (LDA < max(1, M)) {
    INFO.value = -8;
  }

  // If the input parameters M, N, NRHS, KMAX, LDA are valid:
  //   a) Test the input workspace size LWORK for the minimum
  //      size requirement IWS.
  //   b) Determine the optimal block size NB and optimal
  //      workspace size LWKOPT to be returned in WORK(1)
  //      in case of (1) LWORK < IWS, (2) LQUERY = true ,
  //      (3) when routine exits.
  // Here, IWS is the miminum workspace required for unblocked code.
  final MINMN = min(M, N);
  final int IWS, LWKOPT;
  if (MINMN == 0) {
    IWS = 1;
    LWKOPT = 1;
  } else {
    // Minimal workspace size in case of using only unblocked
    // BLAS 2 code in DLAQP2RK.
    // 1) DGEQP3RK and DLAQP2RK: 2*N to store full and partial
    //    column 2-norms.
    // 2) DLAQP2RK: N+NRHS-1 to use in WORK array that is used
    //    in DLARF subroutine inside DLAQP2RK to apply an
    //    elementary reflector from the left.
    // TOTAL_WORK_SIZE = 3*N + NRHS - 1
    IWS = 3 * N + NRHS - 1;

    // Assign to NB optimal block size.
    NB = ilaenv(INB, 'DGEQP3RK', ' ', M, N, -1, -1);

    // A formula for the optimal workspace size in case of using
    // both unblocked BLAS 2 in DLAQP2RK and blocked BLAS 3 code
    // in DLAQP3RK.
    // 1) DGEQP3RK, DLAQP2RK, DLAQP3RK: 2*N to store full and
    //    partial column 2-norms.
    // 2) DLAQP2RK: N+NRHS-1 to use in WORK array that is used
    //    in DLARF subroutine to apply an elementary reflector
    //    from the left.
    // 3) DLAQP3RK: NB*(N+NRHS) to use in the work array F that
    //    is used to apply a block reflector from
    //    the left.
    // 4) DLAQP3RK: NB to use in the auxilixary array AUX.
    // Sizes (2) and ((3) + (4)) should intersect, therefore
    // TOTAL_WORK_SIZE = 2*N + NB*( N+NRHS+1 ), given NBMIN=2.
    LWKOPT = 2 * N + NB * (N + NRHS + 1);
  }
  WORK[1] = LWKOPT.toDouble();

  if (INFO.value == 0 && LWORK < IWS && !LQUERY) {
    INFO.value = -15;
  }

  // NOTE: The optimal workspace size is returned in WORK(1), if
  //       the input parameters M, N, NRHS, KMAX, LDA are valid.

  if (INFO.value != 0) {
    xerbla('DGEQP3RK', -INFO.value);
    return;
  } else if (LQUERY) {
    return;
  }

  // Quick return if possible for M=0 or N=0.
  if (MINMN == 0) {
    K.value = 0;
    MAXC2NRMK.value = ZERO;
    RELMAXC2NRMK.value = ZERO;
    WORK[1] = LWKOPT.toDouble();
    return;
  }

  // Initialize column pivot array JPIV.
  for (var J = 1; J <= N; J++) {
    JPIV[J] = J;
  }

  // Initialize storage for partial and exact column 2-norms.
  // a) The elements WORK(1:N) are used to store partial column
  //    2-norms of the matrix A, and may decrease in each computation
  //    step; initialize to the values of complete columns 2-norms.
  // b) The elements WORK(N+1:2*N) are used to store complete column
  //    2-norms of the matrix A, they are not changed during the
  //    computation; initialize the values of complete columns 2-norms.
  for (var J = 1; J <= N; J++) {
    WORK[J] = dnrm2(M, A(1, J).asArray(), 1);
    WORK[N + J] = WORK[J];
  }

  // Compute the pivot column index and the maximum column 2-norm
  // for the whole original matrix stored in A(1:M,1:N).
  final KP1 = idamax(N, WORK(1), 1);
  final MAXC2NRM = WORK[KP1];

  if (disnan(MAXC2NRM)) {
    // Check if the matrix A contains NaN, set INFO parameter
    // to the column number where the first NaN is found and return;
    // from the routine.
    K.value = 0;
    INFO.value = KP1;

    // Set MAXC2NRMK and RELMAXC2NRMK to NaN.
    MAXC2NRMK.value = MAXC2NRM;
    RELMAXC2NRMK.value = MAXC2NRM;

    // Array TAU is not set and contains undefined elements.
    WORK[1] = LWKOPT.toDouble();
    return;
  }

  if (MAXC2NRM == ZERO) {
    // Check is the matrix A is a zero matrix, set array TAU and
    // return from the routine.
    K.value = 0;
    MAXC2NRMK.value = ZERO;
    RELMAXC2NRMK.value = ZERO;

    for (var J = 1; J <= MINMN; J++) {
      TAU[J] = ZERO;
    }

    WORK[1] = LWKOPT.toDouble();
    return;
  }

  final HUGEVAL = dlamch('Overflow');
  if (MAXC2NRM > HUGEVAL) {
    // Check if the matrix A contains +Inf or -Inf, set INFO parameter
    // to the column number, where the first +/-Inf  is found plus N,
    // and continue the computation.
    INFO.value = N + KP1;
  }

  // Quick return if possible for the case when the first
  // stopping criterion is satisfied, i.e. KMAX = 0.
  if (KMAX == 0) {
    K.value = 0;
    MAXC2NRMK.value = MAXC2NRM;
    RELMAXC2NRMK.value = ONE;
    for (var J = 1; J <= MINMN; J++) {
      TAU[J] = ZERO;
    }
    WORK[1] = LWKOPT.toDouble();
    return;
  }

  final EPS = dlamch('Epsilon');

  // Adjust ABSTOL
  if (ABSTOL.value >= ZERO) {
    final SAFMIN = dlamch('Safe minimum');
    ABSTOL.value = max(ABSTOL.value, TWO * SAFMIN);
  }

  // Adjust RELTOL
  if (RELTOL.value >= ZERO) {
    RELTOL.value = max(RELTOL.value, EPS);
  }

  // JMAX is the maximum index of the column to be factorized,
  // which is also limited by the first stopping criterion KMAX.
  final JMAX = min(KMAX, MINMN);

  // Quick return if possible for the case when the second or third
  // stopping criterion for the whole original matrix is satified,
  // i.e. MAXC2NRM <= ABSTOL or RELMAXC2NRM <= RELTOL
  // (which is ONE <= RELTOL).
  if (MAXC2NRM <= ABSTOL.value || ONE <= RELTOL.value) {
    K.value = 0;
    MAXC2NRMK.value = MAXC2NRM;
    RELMAXC2NRMK.value = ONE;

    for (var J = 1; J <= MINMN; J++) {
      TAU[J] = ZERO;
    }

    WORK[1] = LWKOPT.toDouble();
    return;
  }

  // Factorize columns

  // Determine the block size.
  NBMIN = 2;
  NX = 0;

  if ((NB > 1) && (NB < MINMN)) {
    // Determine when to cross over from blocked to unblocked code.
    // (for N less than NX, unblocked code should be used).
    NX = max(0, ilaenv(IXOVER, 'DGEQP3RK', ' ', M, N, -1, -1));

    if (NX < MINMN) {
      // Determine if workspace is large enough for blocked code.
      if (LWORK < LWKOPT) {
        // Not enough workspace to use optimal block size that
        // is currently stored in NB.
        // Reduce NB and determine the minimum value of NB.
        NB = (LWORK - 2 * N) ~/ (N + 1);
        NBMIN = max(2, ilaenv(INBMIN, 'DGEQP3RK', ' ', M, N, -1, -1));
      }
    }
  }

  // DONE is the boolean flag to rerpresent the case when the
  // factorization completed in the block factorization routine,
  // before the end of the block.
  DONE.value = false;

  // J is the column index.
  J = 1;

  // (1) Use blocked code initially.

  // JMAXB is the maximum column index of the block, when the
  // blocked code is used, is also limited by the first stopping
  // criterion KMAX.
  final JMAXB = min(KMAX, MINMN - NX);

  if (NB >= NBMIN && NB < JMAX && JMAXB > 0) {
    // Loop over the column blocks of the matrix A(1:M,1:JMAXB). Here:
    // J   is the column index of a column block;
    // JB  is the column block size to pass to block factorization
    //     routine in a loop step;
    // JBF is the number of columns that were actually factorized
    //     that was returned by the block factorization routine
    //     in a loop step, JBF <= JB;
    // N_SUB is the number of columns in the submatrix;
    // IOFFSET is the number of rows that should not be factorized.
    while (J <= JMAXB) {
      final JB = Box(min(NB, JMAXB - J + 1));
      final JBF = Box(0);
      final N_SUB = N - J + 1;
      final IOFFSET = J - 1;

      // Factorize JB columns among the columns A(J:N).
      dlaqp3rk(
          M,
          N_SUB,
          NRHS,
          IOFFSET,
          JB,
          ABSTOL.value,
          RELTOL.value,
          KP1,
          MAXC2NRM,
          A(1, J),
          LDA,
          DONE,
          JBF,
          MAXC2NRMK,
          RELMAXC2NRMK,
          JPIV(J),
          TAU(J),
          WORK(J),
          WORK(N + J),
          WORK(2 * N + 1),
          WORK(2 * N + JB.value + 1).asMatrix(),
          N + NRHS - J + 1,
          IWORK,
          IINFO);

      // Set INFO on the first occurence of Inf.
      if (IINFO.value > N_SUB && INFO.value == 0) {
        INFO.value = 2 * IOFFSET + IINFO.value;
      }

      if (DONE.value) {
        // Either the submatrix is zero before the end of the
        // column block, or ABSTOL or RELTOL criterion is
        // satisfied before the end of the column block, we can
        // return from the routine. Perform the following before
        // returning:
        //   a) Set the number of factorized columns K,
        //      K = IOFFSET + JBF from the last call of blocked
        //      routine.
        //   NOTE: 1) MAXC2NRMK and RELMAXC2NRMK are returned
        //            by the block factorization routine;
        //         2) The remaining TAUs are set to ZERO by the
        //            block factorization routine.
        K.value = IOFFSET + JBF.value;

        // Set INFO on the first occurrence of NaN, NaN takes
        // prcedence over Inf.
        if (IINFO.value <= N_SUB && IINFO.value > 0) {
          INFO.value = IOFFSET + IINFO.value;
        }

        // Return from the routine.
        WORK[1] = LWKOPT.toDouble();
        return;
      }

      J += JBF.value;
    }
  }

  // Use unblocked code to factor the last or only block.
  // J = JMAX+1 means we factorized the maximum possible number of
  // columns, that is in ELSE clause we need to compute
  // the MAXC2NORM and RELMAXC2NORM to return after we processed
  // the blocks.
  if (J <= JMAX) {
    // N_SUB is the number of columns in the submatrix;
    // IOFFSET is the number of rows that should not be factorized.
    final N_SUB = N - J + 1;
    final IOFFSET = J - 1;
    final KMAX = Box(JMAX - J + 1);
    final KF = Box(0);
    dlaqp2rk(
        M,
        N_SUB,
        NRHS,
        IOFFSET,
        KMAX,
        ABSTOL.value,
        RELTOL.value,
        KP1,
        MAXC2NRM,
        A(1, J),
        LDA,
        KF,
        MAXC2NRMK,
        RELMAXC2NRMK,
        JPIV(J),
        TAU(J),
        WORK(J),
        WORK(N + J),
        WORK(2 * N + 1),
        IINFO);

    // ABSTOL or RELTOL criterion is satisfied when the number of
    // the factorized columns KF is smaller then the  number
    // of columns JMAX-J+1 supplied to be factorized by the
    // unblocked routine, we can return from
    // the routine. Perform the following before returning:
    //    a) Set the number of factorized columns K,
    //    b) MAXC2NRMK and RELMAXC2NRMK are returned by the
    //       unblocked factorization routine above.
    K.value = J - 1 + KF.value;

    // Set INFO on the first exception occurence.

    // Set INFO on the first exception occurence of Inf or NaN,
    // (NaN takes precedence over Inf).

    if (IINFO.value > N_SUB && INFO.value == 0) {
      INFO.value = 2 * IOFFSET + IINFO.value;
    } else if (IINFO.value <= N_SUB && IINFO.value > 0) {
      INFO.value = IOFFSET + IINFO.value;
    }
  } else {
    // Compute the return values for blocked code.

    // Set the number of factorized columns if the unblocked routine
    // was not called.
    K.value = JMAX;

    // If there exits a residual matrix after the blocked code:
    //    1) compute the values of MAXC2NRMK, RELMAXC2NRMK of the
    //       residual matrix, otherwise set them to ZERO;
    //    2) Set TAU(K+1:MINMN) to ZERO.
    if (K.value < MINMN) {
      final JMAXC2NRM = K.value + idamax(N - K.value, WORK(K.value + 1), 1);
      MAXC2NRMK.value = WORK[JMAXC2NRM];
      if (K.value == 0) {
        RELMAXC2NRMK.value = ONE;
      } else {
        RELMAXC2NRMK.value = MAXC2NRMK.value / MAXC2NRM;
      }

      for (var J = K.value + 1; J <= MINMN; J++) {
        TAU[J] = ZERO;
      }
    }

    // END IF( J <= JMAX ) THEN
  }

  WORK[1] = LWKOPT.toDouble();
}