dtrevc3 function

void dtrevc3(
  1. String SIDE,
  2. String HOWMNY,
  3. Array<bool> SELECT_,
  4. int N,
  5. Matrix<double> T_,
  6. int LDT,
  7. Matrix<double> VL_,
  8. int LDVL,
  9. Matrix<double> VR_,
  10. int LDVR,
  11. int MM,
  12. Box<int> M,
  13. Array<double> WORK_,
  14. int LWORK,
  15. Box<int> INFO,
)

Implementation

void dtrevc3(
  final String SIDE,
  final String HOWMNY,
  final Array<bool> SELECT_,
  final int N,
  final Matrix<double> T_,
  final int LDT,
  final Matrix<double> VL_,
  final int LDVL,
  final Matrix<double> VR_,
  final int LDVR,
  final int MM,
  final Box<int> M,
  final Array<double> WORK_,
  final int LWORK,
  final Box<int> INFO,
) {
  final SELECT = SELECT_.having();
  final T = T_.having(ld: LDT);
  final VL = VL_.having(ld: LDVL);
  final VR = VR_.having(ld: LDVR);
  final WORK = WORK_.having();
  const ZERO = 0.0, ONE = 1.0;
  const NBMIN = 8, NBMAX = 128;
  bool ALLV, BOTHV, LEFTV, LQUERY, OVER, PAIR, RIGHTV, SOMEV;
  int I, II, IP, IS, J, J1, J2, JNXT, K, KI, IV, MAXWRK, NB, KI2;
  double BETA,
      BIGNUM,
      EMAX,
      REC,
      REMAX = 0,
      SMIN,
      SMLNUM,
      ULP,
      UNFL,
      VCRIT,
      VMAX,
      WI,
      WR;
  final X = Matrix<double>(2, 2);
  final ISCOMPLEX = Array<int>(NBMAX);
  final IERR = Box(0);
  final SCALE = Box(0.0), XNORM = Box(0.0);

  // Decode and test the input parameters
  BOTHV = lsame(SIDE, 'B');
  RIGHTV = lsame(SIDE, 'R') || BOTHV;
  LEFTV = lsame(SIDE, 'L') || BOTHV;

  ALLV = lsame(HOWMNY, 'A');
  OVER = lsame(HOWMNY, 'B');
  SOMEV = lsame(HOWMNY, 'S');

  INFO.value = 0;
  NB = ilaenv(1, 'DTREVC', SIDE + HOWMNY, N, -1, -1, -1);
  MAXWRK = max(1, N + 2 * N * NB);
  WORK[1] = MAXWRK.toDouble();
  LQUERY = LWORK == -1;
  if (!RIGHTV && !LEFTV) {
    INFO.value = -1;
  } else if (!ALLV && !OVER && !SOMEV) {
    INFO.value = -2;
  } else if (N < 0) {
    INFO.value = -4;
  } else if (LDT < max(1, N)) {
    INFO.value = -6;
  } else if (LDVL < 1 || (LEFTV && LDVL < N)) {
    INFO.value = -8;
  } else if (LDVR < 1 || (RIGHTV && LDVR < N)) {
    INFO.value = -10;
  } else if (LWORK < max(1, 3 * N) && !LQUERY) {
    INFO.value = -14;
  } else {
    // Set M to the number of columns required to store the selected
    // eigenvectors, standardize the array SELECT if necessary, and
    // test MM.
    if (SOMEV) {
      M.value = 0;
      PAIR = false;
      for (J = 1; J <= N; J++) {
        if (PAIR) {
          PAIR = false;
          SELECT[J] = false;
        } else {
          if (J < N) {
            if (T[J + 1][J] == ZERO) {
              if (SELECT[J]) M.value++;
            } else {
              PAIR = true;
              if (SELECT[J] || SELECT[J + 1]) {
                SELECT[J] = true;
                M.value += 2;
              }
            }
          } else {
            if (SELECT[N]) M.value++;
          }
        }
      }
    } else {
      M.value = N;
    }

    if (MM < M.value) {
      INFO.value = -11;
    }
  }
  if (INFO.value != 0) {
    xerbla('DTREVC3', -INFO.value);
    return;
  } else if (LQUERY) {
    return;
  }

  // Quick return if possible.
  if (N == 0) return;

  // Use blocked version of back-transformation if sufficient workspace.
  // Zero-out the workspace to avoid potential NaN propagation.
  if (OVER && LWORK >= N + 2 * N * NBMIN) {
    NB = (LWORK - N) ~/ (2 * N);
    NB = min(NB, NBMAX);
    dlaset('F', N, 1 + 2 * NB, ZERO, ZERO, WORK.asMatrix(N), N);
  } else {
    NB = 1;
  }

  // Set the constants to control overflow.
  UNFL = dlamch('Safe minimum');
  ULP = dlamch('Precision');
  SMLNUM = UNFL * (N / ULP);
  BIGNUM = (ONE - ULP) / SMLNUM;

  // Compute 1-norm of each column of strictly upper triangular
  // part of T to control overflow in triangular solver.
  WORK[1] = ZERO;
  for (J = 2; J <= N; J++) {
    WORK[J] = ZERO;
    for (I = 1; I <= J - 1; I++) {
      WORK[J] += T[I][J].abs();
    }
  }

  // Index IP is used to specify the real or complex eigenvalue:
  // IP = 0, real eigenvalue,
  // 1, first  of conjugate complex pair: (wr,wi)
  // -1, second of conjugate complex pair: (wr,wi)
  // ISCOMPLEX array stores IP for each column in current block.
  if (RIGHTV) {
    // Compute right eigenvectors.

    // IV is index of column in current block.
    // For complex right vector, uses IV-1 for real part and IV for complex part.
    // Non-blocked version always uses IV=2;
    // blocked     version starts with IV=NB, goes down to 1 or 2.
    // (Note the "0-th" column is used for 1-norms computed above.)
    IV = 2;
    if (NB > 2) {
      IV = NB;
    }

    IP = 0;
    IS = M.value;
    for (KI = N; KI >= 1; KI--) {
      if (IP == -1) {
        // previous iteration (ki+1) was second of conjugate pair,
        // so this ki is first of conjugate pair; skip to end of loop
        IP = 1;
        continue;
      } else if (KI == 1) {
        // last column, so this ki must be real eigenvalue
        IP = 0;
      } else if (T[KI][KI - 1] == ZERO) {
        // zero on sub-diagonal, so this ki is real eigenvalue
        IP = 0;
      } else {
        // non-zero on sub-diagonal, so this ki is second of conjugate pair
        IP = -1;
      }

      if (SOMEV) {
        if (IP == 0) {
          if (!SELECT[KI]) continue;
        } else {
          if (!SELECT[KI - 1]) continue;
        }
      }

      // Compute the KI-th eigenvalue (WR,WI).
      WR = T[KI][KI];
      WI = ZERO;
      if (IP != 0) {
        WI = sqrt(T[KI][KI - 1].abs()) * sqrt(T[KI - 1][KI].abs());
      }
      SMIN = max(ULP * (WR.abs() + WI.abs()), SMLNUM);

      if (IP == 0) {
        // Real right eigenvector
        WORK[KI + IV * N] = ONE;

        // Form right-hand side.
        for (K = 1; K <= KI - 1; K++) {
          WORK[K + IV * N] = -T[K][KI];
        }

        // Solve upper quasi-triangular system:
        // [ T[1:KI-1][1:KI-1] - WR ]*X = SCALE*WORK.
        JNXT = KI - 1;
        for (J = KI - 1; J >= 1; J--) {
          if (J > JNXT) continue;
          J1 = J;
          J2 = J;
          JNXT = J - 1;
          if (J > 1) {
            if (T[J][J - 1] != ZERO) {
              J1 = J - 1;
              JNXT = J - 2;
            }
          }

          if (J1 == J2) {
            // 1-by-1 diagonal block
            dlaln2(
                false,
                1,
                1,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + IV * N).asMatrix(N),
                N,
                WR,
                ZERO,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale X[1][1] to avoid overflow when updating
            // the right-hand side.
            if (XNORM.value > ONE) {
              if (WORK[J] > BIGNUM / XNORM.value) {
                X[1][1] /= XNORM.value;
                SCALE.value /= XNORM.value;
              }
            }

            // Scale if necessary
            if (SCALE.value != ONE) dscal(KI, SCALE.value, WORK(1 + IV * N), 1);
            WORK[J + IV * N] = X[1][1];

            // Update right-hand side
            daxpy(J - 1, -X[1][1], T(1, J).asArray(), 1, WORK(1 + IV * N), 1);
          } else {
            // 2-by-2 diagonal block
            dlaln2(
                false,
                2,
                1,
                SMIN,
                ONE,
                T(J - 1, J - 1),
                LDT,
                ONE,
                ONE,
                WORK(J - 1 + IV * N).asMatrix(N),
                N,
                WR,
                ZERO,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale X[1][1] and X[2][1] to avoid overflow when
            // updating the right-hand side.
            if (XNORM.value > ONE) {
              BETA = max(WORK[J - 1], WORK[J]);
              if (BETA > BIGNUM / XNORM.value) {
                X[1][1] /= XNORM.value;
                X[2][1] /= XNORM.value;
                SCALE.value /= XNORM.value;
              }
            }

            // Scale if necessary
            if (SCALE.value != ONE) dscal(KI, SCALE.value, WORK(1 + IV * N), 1);
            WORK[J - 1 + IV * N] = X[1][1];
            WORK[J + IV * N] = X[2][1];

            // Update right-hand side
            daxpy(
                J - 2, -X[1][1], T(1, J - 1).asArray(), 1, WORK(1 + IV * N), 1);
            daxpy(J - 2, -X[2][1], T(1, J).asArray(), 1, WORK(1 + IV * N), 1);
          }
        }

        // Copy the vector x or Q*x to VR and normalize.
        if (!OVER) {
          // no back-transform: copy x to VR and normalize.
          dcopy(KI, WORK(1 + IV * N), 1, VR(1, IS).asArray(), 1);

          II = idamax(KI, VR(1, IS).asArray(), 1);
          REMAX = ONE / VR[II][IS].abs();
          dscal(KI, REMAX, VR(1, IS).asArray(), 1);

          for (K = KI + 1; K <= N; K++) {
            VR[K][IS] = ZERO;
          }
        } else if (NB == 1) {
          // version 1: back-transform each vector with GEMV, Q*x.
          if (KI > 1) {
            dgemv('N', N, KI - 1, ONE, VR, LDVR, WORK(1 + IV * N), 1,
                WORK[KI + IV * N], VR(1, KI).asArray(), 1);
          }

          II = idamax(N, VR(1, KI).asArray(), 1);
          REMAX = ONE / VR[II][KI].abs();
          dscal(N, REMAX, VR(1, KI).asArray(), 1);
        } else {
          // version 2: back-transform block of vectors with GEMM
          // zero out below vector
          for (K = KI + 1; K <= N; K++) {
            WORK[K + IV * N] = ZERO;
          }
          ISCOMPLEX[IV] = IP;
          // back-transform and normalization is done below
        }
      } else {
        // Complex right eigenvector.

        // Initial solve
        // [ ( T[KI-1][KI-1] T[KI-1][KI] ) - (WR + I*WI) ]*X = 0.
        // [ ( T[KI][  KI-1] T[KI][  KI] )               ]
        if (T[KI - 1][KI].abs() >= T[KI][KI - 1].abs()) {
          WORK[KI - 1 + (IV - 1) * N] = ONE;
          WORK[KI + IV * N] = WI / T[KI - 1][KI];
        } else {
          WORK[KI - 1 + (IV - 1) * N] = -WI / T[KI][KI - 1];
          WORK[KI + IV * N] = ONE;
        }
        WORK[KI + (IV - 1) * N] = ZERO;
        WORK[KI - 1 + IV * N] = ZERO;

        // Form right-hand side.
        for (K = 1; K <= KI - 2; K++) {
          WORK[K + (IV - 1) * N] = -WORK[KI - 1 + (IV - 1) * N] * T[K][KI - 1];
          WORK[K + IV * N] = -WORK[KI + IV * N] * T[K][KI];
        }

        // Solve upper quasi-triangular system:
        // [ T[1:KI-2][1:KI-2] - (WR+i*WI) ]*X = SCALE*(WORK+i*WORK2)
        JNXT = KI - 2;
        for (J = KI - 2; J >= 1; J--) {
          if (J > JNXT) continue;
          J1 = J;
          J2 = J;
          JNXT = J - 1;
          if (J > 1) {
            if (T[J][J - 1] != ZERO) {
              J1 = J - 1;
              JNXT = J - 2;
            }
          }

          if (J1 == J2) {
            // 1-by-1 diagonal block
            dlaln2(
                false,
                1,
                2,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + (IV - 1) * N).asMatrix(N),
                N,
                WR,
                WI,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale X[1][1] and X[1][2] to avoid overflow when
            // updating the right-hand side.
            if (XNORM.value > ONE) {
              if (WORK[J] > BIGNUM / XNORM.value) {
                X[1][1] /= XNORM.value;
                X[1][2] /= XNORM.value;
                SCALE.value /= XNORM.value;
              }
            }

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(KI, SCALE.value, WORK(1 + (IV - 1) * N), 1);
              dscal(KI, SCALE.value, WORK(1 + IV * N), 1);
            }
            WORK[J + (IV - 1) * N] = X[1][1];
            WORK[J + IV * N] = X[1][2];

            // Update the right-hand side
            daxpy(J - 1, -X[1][1], T(1, J).asArray(), 1, WORK(1 + (IV - 1) * N),
                1);
            daxpy(J - 1, -X[1][2], T(1, J).asArray(), 1, WORK(1 + IV * N), 1);
          } else {
            // 2-by-2 diagonal block
            dlaln2(
                false,
                2,
                2,
                SMIN,
                ONE,
                T(J - 1, J - 1),
                LDT,
                ONE,
                ONE,
                WORK(J - 1 + (IV - 1) * N).asMatrix(N),
                N,
                WR,
                WI,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale X to avoid overflow when updating
            // the right-hand side.
            if (XNORM.value > ONE) {
              BETA = max(WORK[J - 1], WORK[J]);
              if (BETA > BIGNUM / XNORM.value) {
                REC = ONE / XNORM.value;
                X[1][1] *= REC;
                X[1][2] *= REC;
                X[2][1] *= REC;
                X[2][2] *= REC;
                SCALE.value *= REC;
              }
            }

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(KI, SCALE.value, WORK(1 + (IV - 1) * N), 1);
              dscal(KI, SCALE.value, WORK(1 + IV * N), 1);
            }
            WORK[J - 1 + (IV - 1) * N] = X[1][1];
            WORK[J + (IV - 1) * N] = X[2][1];
            WORK[J - 1 + IV * N] = X[1][2];
            WORK[J + IV * N] = X[2][2];

            // Update the right-hand side
            daxpy(J - 2, -X[1][1], T(1, J - 1).asArray(), 1,
                WORK(1 + (IV - 1) * N), 1);
            daxpy(J - 2, -X[2][1], T(1, J).asArray(), 1, WORK(1 + (IV - 1) * N),
                1);
            daxpy(
                J - 2, -X[1][2], T(1, J - 1).asArray(), 1, WORK(1 + IV * N), 1);
            daxpy(J - 2, -X[2][2], T(1, J).asArray(), 1, WORK(1 + IV * N), 1);
          }
        }

        // Copy the vector x or Q*x to VR and normalize.
        if (!OVER) {
          // no back-transform: copy x to VR and normalize.
          dcopy(KI, WORK(1 + (IV - 1) * N), 1, VR(1, IS - 1).asArray(), 1);
          dcopy(KI, WORK(1 + IV * N), 1, VR(1, IS).asArray(), 1);

          EMAX = ZERO;
          for (K = 1; K <= KI; K++) {
            EMAX = max(EMAX, VR[K][IS - 1].abs() + VR[K][IS].abs());
          }
          REMAX = ONE / EMAX;
          dscal(KI, REMAX, VR(1, IS - 1).asArray(), 1);
          dscal(KI, REMAX, VR(1, IS).asArray(), 1);

          for (K = KI + 1; K <= N; K++) {
            VR[K][IS - 1] = ZERO;
            VR[K][IS] = ZERO;
          }
        } else if (NB == 1) {
          // version 1: back-transform each vector with GEMV, Q*x.
          if (KI > 2) {
            dgemv('N', N, KI - 2, ONE, VR, LDVR, WORK(1 + (IV - 1) * N), 1,
                WORK[KI - 1 + (IV - 1) * N], VR(1, KI - 1).asArray(), 1);
            dgemv('N', N, KI - 2, ONE, VR, LDVR, WORK(1 + IV * N), 1,
                WORK[KI + IV * N], VR(1, KI).asArray(), 1);
          } else {
            dscal(N, WORK[KI - 1 + (IV - 1) * N], VR(1, KI - 1).asArray(), 1);
            dscal(N, WORK[KI + IV * N], VR(1, KI).asArray(), 1);
          }

          EMAX = ZERO;
          for (K = 1; K <= N; K++) {
            EMAX = max(EMAX, VR[K][KI - 1].abs() + VR[K][KI].abs());
          }
          REMAX = ONE / EMAX;
          dscal(N, REMAX, VR(1, KI - 1).asArray(), 1);
          dscal(N, REMAX, VR(1, KI).asArray(), 1);
        } else {
          // version 2: back-transform block of vectors with GEMM
          // zero out below vector
          for (K = KI + 1; K <= N; K++) {
            WORK[K + (IV - 1) * N] = ZERO;
            WORK[K + IV * N] = ZERO;
          }
          ISCOMPLEX[IV - 1] = -IP;
          ISCOMPLEX[IV] = IP;
          IV--;
          // back-transform and normalization is done below
        }
      }

      if (NB > 1) {
        // Blocked version of back-transform
        // For complex case, KI2 includes both vectors (KI-1 and KI)
        if (IP == 0) {
          KI2 = KI;
        } else {
          KI2 = KI - 1;
        }

        // Columns IV:NB of work are valid vectors.
        // When the number of vectors stored reaches NB-1 or NB,
        // or if this was last vector, do the GEMM
        if ((IV <= 2) || (KI2 == 1)) {
          dgemm(
              'N',
              'N',
              N,
              NB - IV + 1,
              KI2 + NB - IV,
              ONE,
              VR,
              LDVR,
              WORK(1 + IV * N).asMatrix(N),
              N,
              ZERO,
              WORK(1 + (NB + IV) * N).asMatrix(N),
              N);
          // normalize vectors
          for (K = IV; K <= NB; K++) {
            if (ISCOMPLEX[K] == 0) {
              // real eigenvector
              II = idamax(N, WORK(1 + (NB + K) * N), 1);
              REMAX = ONE / WORK[II + (NB + K) * N].abs();
            } else if (ISCOMPLEX[K] == 1) {
              // first eigenvector of conjugate pair
              EMAX = ZERO;
              for (II = 1; II <= N; II++) {
                EMAX = max(
                    EMAX,
                    WORK[II + (NB + K) * N].abs() +
                        WORK[II + (NB + K + 1) * N].abs());
              }
              REMAX = ONE / EMAX;
              // else if ISCOMPLEX[K] == -1
              // second eigenvector of conjugate pair
              // reuse same REMAX as previous K
            }
            dscal(N, REMAX, WORK(1 + (NB + K) * N), 1);
          }
          dlacpy('F', N, NB - IV + 1, WORK(1 + (NB + IV) * N).asMatrix(N), N,
              VR(1, KI2), LDVR);
          IV = NB;
        } else {
          IV--;
        }
      } // ! blocked back-transform;

      IS--;
      if (IP != 0) IS--;
    }
  }

  if (LEFTV) {
    // Compute left eigenvectors.

    // IV is index of column in current block.
    // For complex left vector, uses IV for real part and IV+1 for complex part.
    // Non-blocked version always uses IV=1;
    // blocked     version starts with IV=1, goes up to NB-1 or NB.
    // (Note the "0-th" column is used for 1-norms computed above.)
    IV = 1;
    IP = 0;
    IS = 1;
    for (KI = 1; KI <= N; KI++) {
      if (IP == 1) {
        // previous iteration (ki-1) was first of conjugate pair,
        // so this ki is second of conjugate pair; skip to end of loop
        IP = -1;
        continue;
      } else if (KI == N) {
        // last column, so this ki must be real eigenvalue
        IP = 0;
      } else if (T[KI + 1][KI] == ZERO) {
        // zero on sub-diagonal, so this ki is real eigenvalue
        IP = 0;
      } else {
        // non-zero on sub-diagonal, so this ki is first of conjugate pair
        IP = 1;
      }

      if (SOMEV) {
        if (!SELECT[KI]) continue;
      }

      // Compute the KI-th eigenvalue (WR,WI).
      WR = T[KI][KI];
      WI = ZERO;
      if (IP != 0) {
        WI = sqrt(T[KI][KI + 1].abs()) * sqrt(T[KI + 1][KI].abs());
      }
      SMIN = max(ULP * (WR.abs() + WI.abs()), SMLNUM);

      if (IP == 0) {
        // Real left eigenvector
        WORK[KI + IV * N] = ONE;

        // Form right-hand side.
        for (K = KI + 1; K <= N; K++) {
          WORK[K + IV * N] = -T[KI][K];
        }

        // Solve transposed quasi-triangular system:
        // [ T[KI+1:N][KI+1:N] - WR ]**T * X = SCALE*WORK
        VMAX = ONE;
        VCRIT = BIGNUM;

        JNXT = KI + 1;
        for (J = KI + 1; J <= N; J++) {
          if (J < JNXT) continue;
          J1 = J;
          J2 = J;
          JNXT = J + 1;
          if (J < N) {
            if (T[J + 1][J] != ZERO) {
              J2 = J + 1;
              JNXT = J + 2;
            }
          }

          if (J1 == J2) {
            // 1-by-1 diagonal block

            // Scale if necessary to avoid overflow when forming
            // the right-hand side.
            if (WORK[J] > VCRIT) {
              REC = ONE / VMAX;
              dscal(N - KI + 1, REC, WORK(KI + IV * N), 1);
              VMAX = ONE;
              VCRIT = BIGNUM;
            }

            WORK[J + IV * N] -= ddot(J - KI - 1, T(KI + 1, J).asArray(), 1,
                WORK(KI + 1 + IV * N), 1);

            // Solve [ T[J][J] - WR ]**T * X = WORK
            dlaln2(
                false,
                1,
                1,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + IV * N).asMatrix(N),
                N,
                WR,
                ZERO,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(N - KI + 1, SCALE.value, WORK(KI + IV * N), 1);
            }
            WORK[J + IV * N] = X[1][1];
            VMAX = max(WORK[J + IV * N].abs(), VMAX);
            VCRIT = BIGNUM / VMAX;
          } else {
            // 2-by-2 diagonal block

            // Scale if necessary to avoid overflow when forming
            // the right-hand side.
            BETA = max(WORK[J], WORK[J + 1]);
            if (BETA > VCRIT) {
              REC = ONE / VMAX;
              dscal(N - KI + 1, REC, WORK(KI + IV * N), 1);
              VMAX = ONE;
              VCRIT = BIGNUM;
            }

            WORK[J + IV * N] -= ddot(J - KI - 1, T(KI + 1, J).asArray(), 1,
                WORK(KI + 1 + IV * N), 1);

            WORK[J + 1 + IV * N] -= ddot(J - KI - 1, T(KI + 1, J + 1).asArray(),
                1, WORK(KI + 1 + IV * N), 1);

            // Solve
            // [ T[J][J]-WR   T[J][J+1]      ]**T * X = SCALE*( WORK1 )
            // [ T[J+1][J]    T[J+1][J+1]-WR ]                ( WORK2 )
            dlaln2(
                true,
                2,
                1,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + IV * N).asMatrix(N),
                N,
                WR,
                ZERO,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(N - KI + 1, SCALE.value, WORK(KI + IV * N), 1);
            }
            WORK[J + IV * N] = X[1][1];
            WORK[J + 1 + IV * N] = X[2][1];

            VMAX = max(
                WORK[J + IV * N].abs(), max(WORK[J + 1 + IV * N].abs(), VMAX));
            VCRIT = BIGNUM / VMAX;
          }
        }

        // Copy the vector x or Q*x to VL and normalize.
        if (!OVER) {
          // no back-transform: copy x to VL and normalize.
          dcopy(N - KI + 1, WORK(KI + IV * N), 1, VL(KI, IS).asArray(), 1);

          II = idamax(N - KI + 1, VL(KI, IS).asArray(), 1) + KI - 1;
          REMAX = ONE / VL[II][IS].abs();
          dscal(N - KI + 1, REMAX, VL(KI, IS).asArray(), 1);

          for (K = 1; K <= KI - 1; K++) {
            VL[K][IS] = ZERO;
          }
        } else if (NB == 1) {
          // version 1: back-transform each vector with GEMV, Q*x.
          if (KI < N) {
            dgemv(
                'N',
                N,
                N - KI,
                ONE,
                VL(1, KI + 1),
                LDVL,
                WORK(KI + 1 + IV * N),
                1,
                WORK[KI + IV * N],
                VL(1, KI).asArray(),
                1);
          }

          II = idamax(N, VL(1, KI).asArray(), 1);
          REMAX = ONE / VL[II][KI].abs();
          dscal(N, REMAX, VL(1, KI).asArray(), 1);
        } else {
          // version 2: back-transform block of vectors with GEMM
          // zero out above vector
          // could go from KI-NV+1 to KI-1
          for (K = 1; K <= KI - 1; K++) {
            WORK[K + IV * N] = ZERO;
          }
          ISCOMPLEX[IV] = IP;
          // back-transform and normalization is done below
        }
      } else {
        // Complex left eigenvector.

        // Initial solve:
        // [ ( T[KI][KI]    T[KI][KI+1]  )**T - (WR - I* WI) ]*X = 0.
        // [ ( T[KI+1][KI] T[KI+1][KI+1] )                   ]
        if (T[KI][KI + 1].abs() >= T[KI + 1][KI].abs()) {
          WORK[KI + IV * N] = WI / T[KI][KI + 1];
          WORK[KI + 1 + (IV + 1) * N] = ONE;
        } else {
          WORK[KI + IV * N] = ONE;
          WORK[KI + 1 + (IV + 1) * N] = -WI / T[KI + 1][KI];
        }
        WORK[KI + 1 + IV * N] = ZERO;
        WORK[KI + (IV + 1) * N] = ZERO;

        // Form right-hand side.
        for (K = KI + 2; K <= N; K++) {
          WORK[K + IV * N] = -WORK[KI + IV * N] * T[KI][K];
          WORK[K + (IV + 1) * N] = -WORK[KI + 1 + (IV + 1) * N] * T[KI + 1][K];
        }

        // Solve transposed quasi-triangular system:
        // [ T[KI+2:N][KI+2:N]**T - (WR-i*WI) ]*X = WORK1+i*WORK2
        VMAX = ONE;
        VCRIT = BIGNUM;

        JNXT = KI + 2;
        for (J = KI + 2; J <= N; J++) {
          if (J < JNXT) continue;
          J1 = J;
          J2 = J;
          JNXT = J + 1;
          if (J < N) {
            if (T[J + 1][J] != ZERO) {
              J2 = J + 1;
              JNXT = J + 2;
            }
          }

          if (J1 == J2) {
            // 1-by-1 diagonal block

            // Scale if necessary to avoid overflow when
            // forming the right-hand side elements.
            if (WORK[J] > VCRIT) {
              REC = ONE / VMAX;
              dscal(N - KI + 1, REC, WORK(KI + IV * N), 1);
              dscal(N - KI + 1, REC, WORK(KI + (IV + 1) * N), 1);
              VMAX = ONE;
              VCRIT = BIGNUM;
            }

            WORK[J + IV * N] -= ddot(J - KI - 2, T(KI + 2, J).asArray(), 1,
                WORK(KI + 2 + IV * N), 1);
            WORK[J + (IV + 1) * N] -= ddot(J - KI - 2, T(KI + 2, J).asArray(),
                1, WORK(KI + 2 + (IV + 1) * N), 1);

            // Solve [ T[J][J]-(WR-i*WI) ]*(X11+i*X12)= WK+I*WK2
            dlaln2(
                false,
                1,
                2,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + IV * N).asMatrix(N),
                N,
                WR,
                -WI,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(N - KI + 1, SCALE.value, WORK(KI + IV * N), 1);
              dscal(N - KI + 1, SCALE.value, WORK(KI + (IV + 1) * N), 1);
            }
            WORK[J + IV * N] = X[1][1];
            WORK[J + (IV + 1) * N] = X[1][2];
            VMAX = max(WORK[J + IV * N].abs(),
                max(WORK[J + (IV + 1) * N].abs(), VMAX));
            VCRIT = BIGNUM / VMAX;
          } else {
            // 2-by-2 diagonal block

            // Scale if necessary to avoid overflow when forming
            // the right-hand side elements.
            BETA = max(WORK[J], WORK[J + 1]);
            if (BETA > VCRIT) {
              REC = ONE / VMAX;
              dscal(N - KI + 1, REC, WORK(KI + IV * N), 1);
              dscal(N - KI + 1, REC, WORK(KI + (IV + 1) * N), 1);
              VMAX = ONE;
              VCRIT = BIGNUM;
            }

            WORK[J + IV * N] -= ddot(J - KI - 2, T(KI + 2, J).asArray(), 1,
                WORK(KI + 2 + IV * N), 1);

            WORK[J + (IV + 1) * N] -= ddot(J - KI - 2, T(KI + 2, J).asArray(),
                1, WORK(KI + 2 + (IV + 1) * N), 1);

            WORK[J + 1 + IV * N] -= ddot(J - KI - 2, T(KI + 2, J + 1).asArray(),
                1, WORK(KI + 2 + IV * N), 1);

            WORK[J + 1 + (IV + 1) * N] -= ddot(J - KI - 2,
                T(KI + 2, J + 1).asArray(), 1, WORK(KI + 2 + (IV + 1) * N), 1);

            // Solve 2-by-2 complex linear equation
            // [ (T[j][j]   T[j][j+1]  )**T - (wr-i*wi)*I ]*X = SCALE*B
            // [ (T[j+1][j] T[j+1][j+1])                  ]
            dlaln2(
                true,
                2,
                2,
                SMIN,
                ONE,
                T(J, J),
                LDT,
                ONE,
                ONE,
                WORK(J + IV * N).asMatrix(N),
                N,
                WR,
                -WI,
                X,
                2,
                SCALE,
                XNORM,
                IERR);

            // Scale if necessary
            if (SCALE.value != ONE) {
              dscal(N - KI + 1, SCALE.value, WORK(KI + IV * N), 1);
              dscal(N - KI + 1, SCALE.value, WORK(KI + (IV + 1) * N), 1);
            }
            WORK[J + IV * N] = X[1][1];
            WORK[J + (IV + 1) * N] = X[1][2];
            WORK[J + 1 + IV * N] = X[2][1];
            WORK[J + 1 + (IV + 1) * N] = X[2][2];
            VMAX = max(
                X[1][1].abs(),
                max(
                  max(X[1][2].abs(), X[2][1].abs()),
                  max(X[2][2].abs(), VMAX),
                ));
            VCRIT = BIGNUM / VMAX;
          }
        }

        // Copy the vector x or Q*x to VL and normalize.
        if (!OVER) {
          // no back-transform: copy x to VL and normalize.
          dcopy(N - KI + 1, WORK(KI + IV * N), 1, VL(KI, IS).asArray(), 1);
          dcopy(N - KI + 1, WORK(KI + (IV + 1) * N), 1,
              VL(KI, IS + 1).asArray(), 1);

          EMAX = ZERO;
          for (K = KI; K <= N; K++) {
            EMAX = max(EMAX, VL[K][IS].abs() + VL[K][IS + 1].abs());
          }
          REMAX = ONE / EMAX;
          dscal(N - KI + 1, REMAX, VL(KI, IS).asArray(), 1);
          dscal(N - KI + 1, REMAX, VL(KI, IS + 1).asArray(), 1);

          for (K = 1; K <= KI - 1; K++) {
            VL[K][IS] = ZERO;
            VL[K][IS + 1] = ZERO;
          }
        } else if (NB == 1) {
          // version 1: back-transform each vector with GEMV, Q*x.
          if (KI < N - 1) {
            dgemv(
                'N',
                N,
                N - KI - 1,
                ONE,
                VL(1, KI + 2),
                LDVL,
                WORK(KI + 2 + IV * N),
                1,
                WORK[KI + IV * N],
                VL(1, KI).asArray(),
                1);
            dgemv(
                'N',
                N,
                N - KI - 1,
                ONE,
                VL(1, KI + 2),
                LDVL,
                WORK(KI + 2 + (IV + 1) * N),
                1,
                WORK[KI + 1 + (IV + 1) * N],
                VL(1, KI + 1).asArray(),
                1);
          } else {
            dscal(N, WORK[KI + IV * N], VL(1, KI).asArray(), 1);
            dscal(N, WORK[KI + 1 + (IV + 1) * N], VL(1, KI + 1).asArray(), 1);
          }

          EMAX = ZERO;
          for (K = 1; K <= N; K++) {
            EMAX = max(EMAX, VL[K][KI].abs() + VL[K][KI + 1].abs());
          }
          REMAX = ONE / EMAX;
          dscal(N, REMAX, VL(1, KI).asArray(), 1);
          dscal(N, REMAX, VL(1, KI + 1).asArray(), 1);
        } else {
          // version 2: back-transform block of vectors with GEMM
          // zero out above vector
          // could go from KI-NV+1 to KI-1
          for (K = 1; K <= KI - 1; K++) {
            WORK[K + IV * N] = ZERO;
            WORK[K + (IV + 1) * N] = ZERO;
          }
          ISCOMPLEX[IV] = IP;
          ISCOMPLEX[IV + 1] = -IP;
          IV++;
          // back-transform and normalization is done below
        }
      }

      if (NB > 1) {
        // Blocked version of back-transform
        // For complex case, KI2 includes both vectors (KI and KI+1)
        if (IP == 0) {
          KI2 = KI;
        } else {
          KI2 = KI + 1;
        }

        // Columns 1:IV of work are valid vectors.
        // When the number of vectors stored reaches NB-1 or NB,
        // or if this was last vector, do the GEMM
        if ((IV >= NB - 1) || (KI2 == N)) {
          dgemm(
              'N',
              'N',
              N,
              IV,
              N - KI2 + IV,
              ONE,
              VL(1, KI2 - IV + 1),
              LDVL,
              WORK(KI2 - IV + 1 + 1 * N).asMatrix(N),
              N,
              ZERO,
              WORK(1 + (NB + 1) * N).asMatrix(N),
              N);
          // normalize vectors
          for (K = 1; K <= IV; K++) {
            if (ISCOMPLEX[K] == 0) {
              // real eigenvector
              II = idamax(N, WORK(1 + (NB + K) * N), 1);
              REMAX = ONE / WORK[II + (NB + K) * N].abs();
            } else if (ISCOMPLEX[K] == 1) {
              // first eigenvector of conjugate pair
              EMAX = ZERO;
              for (II = 1; II <= N; II++) {
                EMAX = max(
                    EMAX,
                    WORK[II + (NB + K) * N].abs() +
                        WORK[II + (NB + K + 1) * N].abs());
              }
              REMAX = ONE / EMAX;
              // else if ISCOMPLEX[K] == -1
              // second eigenvector of conjugate pair
              // reuse same REMAX as previous K
            }
            dscal(N, REMAX, WORK(1 + (NB + K) * N), 1);
          }
          dlacpy('F', N, IV, WORK(1 + (NB + 1) * N).asMatrix(N), N,
              VL(1, KI2 - IV + 1), LDVL);
          IV = 1;
        } else {
          IV++;
        }
      } // ! blocked back-transform;

      IS++;
      if (IP != 0) IS++;
    }
  }
}