dsytrf_aa_2stage function

void dsytrf_aa_2stage(
  1. String UPLO,
  2. int N,
  3. Matrix<double> A_,
  4. int LDA,
  5. Array<double> TB_,
  6. int LTB,
  7. Array<int> IPIV_,
  8. Array<int> IPIV2_,
  9. Array<double> WORK_,
  10. int LWORK,
  11. Box<int> INFO,
)

Implementation

void dsytrf_aa_2stage(
  final String UPLO,
  final int N,
  final Matrix<double> A_,
  final int LDA,
  final Array<double> TB_,
  final int LTB,
  final Array<int> IPIV_,
  final Array<int> IPIV2_,
  final Array<double> WORK_,
  final int LWORK,
  final Box<int> INFO,
) {
  final A = A_.having(ld: LDA);
  final TB = TB_.having();
  final IPIV = IPIV_.having();
  final IPIV2 = IPIV2_.having();
  final WORK = WORK_.having();
  const ZERO = 0.0, ONE = 1.0;
  bool UPPER, TQUERY, WQUERY;
  int I, J, K, I1, I2, TD;
  int LDTB, NB, KB, JB, NT;
  double PIV;
  final IINFO = Box(0);

  // Test the input parameters.
  INFO.value = 0;
  UPPER = lsame(UPLO, 'U');
  WQUERY = (LWORK == -1);
  TQUERY = (LTB == -1);
  if (!UPPER && !lsame(UPLO, 'L')) {
    INFO.value = -1;
  } else if (N < 0) {
    INFO.value = -2;
  } else if (LDA < max(1, N)) {
    INFO.value = -4;
  } else if (LTB < max(1, 4 * N) && !TQUERY) {
    INFO.value = -6;
  } else if (LWORK < max(1, N) && !WQUERY) {
    INFO.value = -10;
  }

  if (INFO.value != 0) {
    xerbla('DSYTRF_AA_2STAGE', -INFO.value);
    return;
  }

  // Answer the query
  NB = ilaenv(1, 'DSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1);
  if (INFO.value == 0) {
    if (TQUERY) {
      TB[1] = max(1, (3 * NB + 1) * N).toDouble();
    }
    if (WQUERY) {
      WORK[1] = max(1, N * NB).toDouble();
    }
  }
  if (TQUERY || WQUERY) {
    return;
  }

  // Quick return;
  if (N == 0) {
    return;
  }

  // Determine the number of the block size
  LDTB = LTB ~/ N;
  if (LDTB < 3 * NB + 1) {
    NB = (LDTB - 1) ~/ 3;
  }
  if (LWORK < NB * N) {
    NB = LWORK ~/ N;
  }

  // Determine the number of the block columns
  NT = (N + NB - 1) ~/ NB;
  TD = 2 * NB;
  KB = min(NB, N);

  // Initialize vectors/matrices
  for (J = 1; J <= KB; J++) {
    IPIV[J] = J;
  }

  // Save NB
  TB[1] = NB.toDouble();

  if (UPPER) {
    // Factorize A as U**T*D*U using the upper triangle of A
    for (J = 0; J <= NT - 1; J++) {
      // Generate Jth column of W and H
      KB = min(NB, N - J * NB);
      for (I = 1; I <= J - 1; I++) {
        if (I == 1) {
          // H(I,J) = T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
          if (I == (J - 1)) {
            JB = NB + KB;
          } else {
            JB = 2 * NB;
          }
          dgemm(
              'NoTranspose',
              'NoTranspose',
              NB,
              KB,
              JB,
              ONE,
              TB(TD + 1 + (I * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1,
              A((I - 1) * NB + 1, J * NB + 1),
              LDA,
              ZERO,
              WORK(I * NB + 1).asMatrix(N),
              N);
        } else {
          // H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
          if (I == J - 1) {
            JB = 2 * NB + KB;
          } else {
            JB = 3 * NB;
          }
          dgemm(
              'NoTranspose',
              'NoTranspose',
              NB,
              KB,
              JB,
              ONE,
              TB(TD + NB + 1 + ((I - 1) * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1,
              A((I - 2) * NB + 1, J * NB + 1),
              LDA,
              ZERO,
              WORK(I * NB + 1).asMatrix(N),
              N);
        }
      }

      // Compute T(J,J)
      dlacpy('Upper', KB, KB, A(J * NB + 1, J * NB + 1), LDA,
          TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
      if (J > 1) {
        // T(J,J) = U(1:J,J)'*H(1:J)
        dgemm(
            'Transpose',
            'NoTranspose',
            KB,
            KB,
            (J - 1) * NB,
            -ONE,
            A(1, J * NB + 1),
            LDA,
            WORK(NB + 1).asMatrix(N),
            N,
            ONE,
            TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1);
        // T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J)
        dgemm(
            'Transpose',
            'NoTranspose',
            KB,
            NB,
            KB,
            ONE,
            A((J - 1) * NB + 1, J * NB + 1),
            LDA,
            TB(TD + NB + 1 + ((J - 1) * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1,
            ZERO,
            WORK(1).asMatrix(N),
            N);
        dgemm(
            'NoTranspose',
            'NoTranspose',
            KB,
            KB,
            NB,
            -ONE,
            WORK(1).asMatrix(N),
            N,
            A((J - 2) * NB + 1, J * NB + 1),
            LDA,
            ONE,
            TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1);
      }
      if (J > 0) {
        dsygst(1, 'Upper', KB, TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1, A((J - 1) * NB + 1, J * NB + 1), LDA, IINFO);
      }

      // Expand T(J,J) into full format
      for (I = 1; I <= KB; I++) {
        for (K = I + 1; K <= KB; K++) {
          TB[TD + (K - I) + 1 + (J * NB + I - 1) * LDTB] =
              TB[TD - (K - (I + 1)) + (J * NB + K - 1) * LDTB];
        }
      }

      if (J < NT - 1) {
        if (J > 0) {
          // Compute H(J,J)
          if (J == 1) {
            dgemm(
                'NoTranspose',
                'NoTranspose',
                KB,
                KB,
                KB,
                ONE,
                TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
                LDTB - 1,
                A((J - 1) * NB + 1, J * NB + 1),
                LDA,
                ZERO,
                WORK(J * NB + 1).asMatrix(N),
                N);
          } else {
            dgemm(
                'NoTranspose',
                'NoTranspose',
                KB,
                KB,
                NB + KB,
                ONE,
                TB(TD + NB + 1 + ((J - 1) * NB) * LDTB).asMatrix(LDTB - 1),
                LDTB - 1,
                A((J - 2) * NB + 1, J * NB + 1),
                LDA,
                ZERO,
                WORK(J * NB + 1).asMatrix(N),
                N);
          }

          // Update with the previous column
          dgemm(
              'Transpose',
              'NoTranspose',
              NB,
              N - (J + 1) * NB,
              J * NB,
              -ONE,
              WORK(NB + 1).asMatrix(N),
              N,
              A(1, (J + 1) * NB + 1),
              LDA,
              ONE,
              A(J * NB + 1, (J + 1) * NB + 1),
              LDA);
        }

        // Copy panel to workspace to call DGETRF
        for (K = 1; K <= NB; K++) {
          dcopy(N - (J + 1) * NB, A(J * NB + K, (J + 1) * NB + 1).asArray(),
              LDA, WORK(1 + (K - 1) * N), 1);
        }

        // Factorize panel
        dgetrf(N - (J + 1) * NB, NB, WORK.asMatrix(N), N,
            IPIV((J + 1) * NB + 1), IINFO);
        // IF (IINFO != 0 && INFO == 0) THEN
        //    INFO = IINFO+(J+1)*NB
        // END IF

        // Copy panel back
        for (K = 1; K <= NB; K++) {
          dcopy(N - (J + 1) * NB, WORK(1 + (K - 1) * N), 1,
              A(J * NB + K, (J + 1) * NB + 1).asArray(), LDA);
        }

        // Compute T(J+1, J), zero out for GEMM update
        KB = min(NB, N - (J + 1) * NB);
        dlaset('Full', KB, NB, ZERO, ZERO,
            TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
        dlacpy('Upper', KB, NB, WORK.asMatrix(N), N,
            TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
        if (J > 0) {
          dtrsm(
              'R',
              'U',
              'N',
              'U',
              KB,
              NB,
              ONE,
              A((J - 1) * NB + 1, J * NB + 1),
              LDA,
              TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1);
        }

        // Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM
        // updates
        for (K = 1; K <= NB; K++) {
          for (I = 1; I <= KB; I++) {
            TB[TD - NB + K - I + 1 + (J * NB + NB + I - 1) * LDTB] =
                TB[TD + NB + I - K + 1 + (J * NB + K - 1) * LDTB];
          }
        }
        dlaset(
            'Lower', KB, NB, ZERO, ONE, A(J * NB + 1, (J + 1) * NB + 1), LDA);

        // Apply pivots to trailing submatrix of A
        for (K = 1; K <= KB; K++) {
          // > Adjust ipiv
          IPIV[(J + 1) * NB + K] += (J + 1) * NB;

          I1 = (J + 1) * NB + K;
          I2 = IPIV[(J + 1) * NB + K];
          if (I1 != I2) {
            // > Apply pivots to previous columns of L
            dswap(K - 1, A((J + 1) * NB + 1, I1).asArray(), 1,
                A((J + 1) * NB + 1, I2).asArray(), 1);
            // > Swap A(I1+1:M, I1) with A(I2, I1+1:M)
            if (I2 > (I1 + 1)) {
              dswap(I2 - I1 - 1, A(I1, I1 + 1).asArray(), LDA,
                  A(I1 + 1, I2).asArray(), 1);
            }
            // > Swap A(I2+1:M, I1) with A(I2+1:M, I2)
            if (I2 < N) {
              dswap(N - I2, A(I1, I2 + 1).asArray(), LDA,
                  A(I2, I2 + 1).asArray(), LDA);
            }
            // > Swap A(I1, I1) with A(I2, I2)
            PIV = A[I1][I1];
            A[I1][I1] = A[I2][I2];
            A[I2][I2] = PIV;
            // > Apply pivots to previous columns of L
            if (J > 0) {
              dswap(J * NB, A(1, I1).asArray(), 1, A(1, I2).asArray(), 1);
            }
          }
        }
      }
    }
  } else {
    // Factorize A as L*D*L**T using the lower triangle of A
    for (J = 0; J <= NT - 1; J++) {
      // Generate Jth column of W and H
      KB = min(NB, N - J * NB);
      for (I = 1; I <= J - 1; I++) {
        if (I == 1) {
          // H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
          if (I == J - 1) {
            JB = NB + KB;
          } else {
            JB = 2 * NB;
          }
          dgemm(
              'NoTranspose',
              'Transpose',
              NB,
              KB,
              JB,
              ONE,
              TB(TD + 1 + (I * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1,
              A(J * NB + 1, (I - 1) * NB + 1),
              LDA,
              ZERO,
              WORK(I * NB + 1).asMatrix(N),
              N);
        } else {
          // H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
          if (I == J - 1) {
            JB = 2 * NB + KB;
          } else {
            JB = 3 * NB;
          }
          dgemm(
              'NoTranspose',
              'Transpose',
              NB,
              KB,
              JB,
              ONE,
              TB(TD + NB + 1 + ((I - 1) * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1,
              A(J * NB + 1, (I - 2) * NB + 1),
              LDA,
              ZERO,
              WORK(I * NB + 1).asMatrix(N),
              N);
        }
      }

      // Compute T(J,J)
      dlacpy('Lower', KB, KB, A(J * NB + 1, J * NB + 1), LDA,
          TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
      if (J > 1) {
        // T(J,J) = L(J,1:J)*H(1:J)
        dgemm(
            'NoTranspose',
            'NoTranspose',
            KB,
            KB,
            (J - 1) * NB,
            -ONE,
            A(J * NB + 1, 1),
            LDA,
            WORK(NB + 1).asMatrix(N),
            N,
            ONE,
            TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1);
        // T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)'
        dgemm(
            'NoTranspose',
            'NoTranspose',
            KB,
            NB,
            KB,
            ONE,
            A(J * NB + 1, (J - 1) * NB + 1),
            LDA,
            TB(TD + NB + 1 + ((J - 1) * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1,
            ZERO,
            WORK(1).asMatrix(N),
            N);
        dgemm(
            'NoTranspose',
            'Transpose',
            KB,
            KB,
            NB,
            -ONE,
            WORK(1).asMatrix(N),
            N,
            A(J * NB + 1, (J - 2) * NB + 1),
            LDA,
            ONE,
            TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1);
      }
      if (J > 0) {
        dsygst(1, 'Lower', KB, TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
            LDTB - 1, A(J * NB + 1, (J - 1) * NB + 1), LDA, IINFO);
      }

      // Expand T(J,J) into full format
      for (I = 1; I <= KB; I++) {
        for (K = I + 1; K <= KB; K++) {
          TB[TD - (K - (I + 1)) + (J * NB + K - 1) * LDTB] =
              TB[TD + (K - I) + 1 + (J * NB + I - 1) * LDTB];
        }
      }

      if (J < NT - 1) {
        if (J > 0) {
          // Compute H(J,J)
          if (J == 1) {
            dgemm(
                'NoTranspose',
                'Transpose',
                KB,
                KB,
                KB,
                ONE,
                TB(TD + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
                LDTB - 1,
                A(J * NB + 1, (J - 1) * NB + 1),
                LDA,
                ZERO,
                WORK(J * NB + 1).asMatrix(N),
                N);
          } else {
            dgemm(
                'NoTranspose',
                'Transpose',
                KB,
                KB,
                NB + KB,
                ONE,
                TB(TD + NB + 1 + ((J - 1) * NB) * LDTB).asMatrix(LDTB - 1),
                LDTB - 1,
                A(J * NB + 1, (J - 2) * NB + 1),
                LDA,
                ZERO,
                WORK(J * NB + 1).asMatrix(N),
                N);
          }

          // Update with the previous column
          dgemm(
              'NoTranspose',
              'NoTranspose',
              N - (J + 1) * NB,
              NB,
              J * NB,
              -ONE,
              A((J + 1) * NB + 1, 1),
              LDA,
              WORK(NB + 1).asMatrix(N),
              N,
              ONE,
              A((J + 1) * NB + 1, J * NB + 1),
              LDA);
        }

        // Factorize panel
        dgetrf(N - (J + 1) * NB, NB, A((J + 1) * NB + 1, J * NB + 1), LDA,
            IPIV((J + 1) * NB + 1), IINFO);
        // IF (IINFO != 0 && INFO == 0) THEN
        //    INFO = IINFO+(J+1)*NB
        // END IF

        // Compute T(J+1, J), zero out for GEMM update
        KB = min(NB, N - (J + 1) * NB);
        dlaset('Full', KB, NB, ZERO, ZERO,
            TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
        dlacpy('Upper', KB, NB, A((J + 1) * NB + 1, J * NB + 1), LDA,
            TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1), LDTB - 1);
        if (J > 0) {
          dtrsm(
              'R',
              'L',
              'T',
              'U',
              KB,
              NB,
              ONE,
              A(J * NB + 1, (J - 1) * NB + 1),
              LDA,
              TB(TD + NB + 1 + (J * NB) * LDTB).asMatrix(LDTB - 1),
              LDTB - 1);
        }

        // Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM
        // updates
        for (K = 1; K <= NB; K++) {
          for (I = 1; I <= KB; I++) {
            TB[TD - NB + K - I + 1 + (J * NB + NB + I - 1) * LDTB] =
                TB[TD + NB + I - K + 1 + (J * NB + K - 1) * LDTB];
          }
        }
        dlaset(
            'Upper', KB, NB, ZERO, ONE, A((J + 1) * NB + 1, J * NB + 1), LDA);

        // Apply pivots to trailing submatrix of A
        for (K = 1; K <= KB; K++) {
          // > Adjust ipiv
          IPIV[(J + 1) * NB + K] += (J + 1) * NB;

          I1 = (J + 1) * NB + K;
          I2 = IPIV[(J + 1) * NB + K];
          if (I1 != I2) {
            // > Apply pivots to previous columns of L
            dswap(K - 1, A(I1, (J + 1) * NB + 1).asArray(), LDA,
                A(I2, (J + 1) * NB + 1).asArray(), LDA);
            // > Swap A(I1+1:M, I1) with A(I2, I1+1:M)
            if (I2 > (I1 + 1)) {
              dswap(I2 - I1 - 1, A(I1 + 1, I1).asArray(), 1,
                  A(I2, I1 + 1).asArray(), LDA);
            }
            // > Swap A(I2+1:M, I1) with A(I2+1:M, I2)
            if (I2 < N) {
              dswap(N - I2, A(I2 + 1, I1).asArray(), 1, A(I2 + 1, I2).asArray(),
                  1);
            }
            // > Swap A(I1, I1) with A(I2, I2)
            PIV = A[I1][I1];
            A[I1][I1] = A[I2][I2];
            A[I2][I2] = PIV;
            // > Apply pivots to previous columns of L
            if (J > 0) {
              dswap(J * NB, A(I1, 1).asArray(), LDA, A(I2, 1).asArray(), LDA);
            }
          }
        }

        // Apply pivots to previous columns of L

        // CALL DLASWP( J*NB, A( 1, 1 ), LDA,
        // $                     (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 )
      }
    }
  }

  // Factor the band matrix
  dgbtrf(N, N, NB, NB, TB.asMatrix(LDTB), LDTB, IPIV2, INFO);
}