analyzeTextSegments static method
Analyze text and return segments with their script types
Implementation
static List<TextScriptSegment> analyzeTextSegments(String text) {
if (text.isEmpty) return [];
List<TextScriptSegment> segments = [];
int start = 0;
bool? currentIsCJK;
for (int i = 0; i < text.length; i++) {
int codePoint = text.codeUnitAt(i);
// Handle surrogate pairs
if (codePoint >= 0xD800 && codePoint <= 0xDBFF && i + 1 < text.length) {
int low = text.codeUnitAt(i + 1);
if (low >= 0xDC00 && low <= 0xDFFF) {
codePoint = 0x10000 + ((codePoint - 0xD800) << 10) + (low - 0xDC00);
}
}
// Skip neutral characters (space, punctuation)
if (codePoint == 0x20 || codePoint == 0x09 || codePoint == 0x0A || codePoint == 0x0D ||
(codePoint >= 0x21 && codePoint <= 0x2F) || // ASCII punctuation
(codePoint >= 0x3A && codePoint <= 0x40)) { // More ASCII punctuation
continue;
}
bool isCJK = isCJKCharacter(codePoint);
if (currentIsCJK == null) {
currentIsCJK = isCJK;
} else if (currentIsCJK != isCJK) {
// Script change detected
segments.add(TextScriptSegment(
start: start,
end: i,
text: text.substring(start, i),
isCJK: currentIsCJK,
));
start = i;
currentIsCJK = isCJK;
}
}
// Add the last segment
if (start < text.length) {
segments.add(TextScriptSegment(
start: start,
end: text.length,
text: text.substring(start),
isCJK: currentIsCJK ?? false,
));
}
return segments;
}