toPlainText static method
Convert HTML to clean plain text
Implementation
static String toPlainText(String html) {
// Remove script and style tags completely
String cleaned = html.replaceAll(
RegExp(r'<(script|style)[^>]*>.*?</\1>',
caseSensitive: false, dotAll: true),
'');
// Convert common HTML elements to plain text equivalents
cleaned = cleaned
// Line breaks for block elements
.replaceAll(
RegExp(r'</(div|p|h[1-6]|li|br)>', caseSensitive: false), '\n')
.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n')
// Double line breaks for paragraphs and headers
.replaceAll(RegExp(r'</(p|h[1-6])>', caseSensitive: false), '\n\n')
// List items
.replaceAll(RegExp(r'<li[^>]*>', caseSensitive: false), '• ')
// Remove all remaining HTML tags
.replaceAll(RegExp(r'<[^>]*>'), '');
return _cleanText(cleaned);
}