analyzeTextSegments static method

List<TextScriptSegment> analyzeTextSegments(
  1. String text
)

Analyze text and return segments with their script types

Implementation

static List<TextScriptSegment> analyzeTextSegments(String text) {
  if (text.isEmpty) return [];

  List<TextScriptSegment> segments = [];
  int start = 0;
  bool? currentIsCJK;

  for (int i = 0; i < text.length; i++) {
    int codePoint = text.codeUnitAt(i);
    // Handle surrogate pairs
    if (codePoint >= 0xD800 && codePoint <= 0xDBFF && i + 1 < text.length) {
      int low = text.codeUnitAt(i + 1);
      if (low >= 0xDC00 && low <= 0xDFFF) {
        codePoint = 0x10000 + ((codePoint - 0xD800) << 10) + (low - 0xDC00);
      }
    }

    // Skip neutral characters (space, punctuation)
    if (codePoint == 0x20 || codePoint == 0x09 || codePoint == 0x0A || codePoint == 0x0D ||
        (codePoint >= 0x21 && codePoint <= 0x2F) || // ASCII punctuation
        (codePoint >= 0x3A && codePoint <= 0x40)) { // More ASCII punctuation
      continue;
    }

    bool isCJK = isCJKCharacter(codePoint);

    if (currentIsCJK == null) {
      currentIsCJK = isCJK;
    } else if (currentIsCJK != isCJK) {
      // Script change detected
      segments.add(TextScriptSegment(
        start: start,
        end: i,
        text: text.substring(start, i),
        isCJK: currentIsCJK,
      ));
      start = i;
      currentIsCJK = isCJK;
    }
  }

  // Add the last segment
  if (start < text.length) {
    segments.add(TextScriptSegment(
      start: start,
      end: text.length,
      text: text.substring(start),
      isCJK: currentIsCJK ?? false,
    ));
  }

  return segments;
}