extractMainContent static method

String? extractMainContent(
  1. String html
)

Extract main article content

Implementation

static String? extractMainContent(String html) {
  final strategies = [
    // Common article selectors
    r'<article[^>]*>(.*?)</article>',
    r'<div[^>]*class=[\"\x27][^\"\x27]*content[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
    r'<div[^>]*class=[\"\x27][^\"\x27]*article[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
    r'<div[^>]*class=[\"\x27][^\"\x27]*post[^\"\x27]*[\"\x27][^>]*>(.*?)</div>',
    r'<main[^>]*>(.*?)</main>',
  ];

  for (final pattern in strategies) {
    final match =
        RegExp(pattern, caseSensitive: false, dotAll: true).firstMatch(html);
    if (match != null) {
      final content = _cleanText(match.group(1) ?? '');
      if (content.length > 100) return content;
    }
  }
  return null;
}