extractMainContent static method
Extract main article content
Implementation
static String? extractMainContent(String html) {
final strategies = [
// Common article selectors
r'<article[^>]*>(.*?)</article>',
r'<div[^>]*class=[\"\x27][^\"\x27]*content[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
r'<div[^>]*class=[\"\x27][^\"\x27]*article[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
r'<div[^>]*class=[\"\x27][^\"\x27]*post[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
r'<main[^>]*>(.*?)</main>',
];
for (final pattern in strategies) {
final match =
RegExp(pattern, caseSensitive: false, dotAll: true).firstMatch(html);
if (match != null) {
final content = _cleanText(match.group(1) ?? '');
if (content.length > 100) return content;
}
}
return null;
}