removeClutter static method

String removeClutter(
  1. String html
)

Remove clutter elements like ads, navigation, etc.

Implementation

static String removeClutter(String html) {
  String cleaned = html;

  // Remove script and style tags
  cleaned = cleaned.replaceAll(
      RegExp(r'<(script|style)[^>]*>.*?</\1>',
          caseSensitive: false, dotAll: true),
      '');

  // Remove navigation elements
  cleaned = cleaned.replaceAll(
      RegExp(r'<(nav|header|footer|aside|menu)[^>]*>.*?</\1>',
          caseSensitive: false, dotAll: true),
      '');

  // Remove elements with ad-related classes/ids
  final adPatterns = [
    r'ad',
    r'advertisement',
    r'banner',
    r'popup',
    r'modal',
    r'social',
    r'share',
    r'comment',
    r'sidebar',
    r'widget',
    r'navigation',
    r'nav',
    r'menu',
    r'breadcrumb'
  ];

  for (final pattern in adPatterns) {
    cleaned = cleaned.replaceAll(
        RegExp(
            r'<[^>]*(?:class|id)=[\"\x27][^\"\x27]*' +
                pattern +
                r'[^\"\x27]*[\"\x27][^>]*>.*?</[^>]*>',
            caseSensitive: false,
            dotAll: true),
        '');
  }

  return cleaned;
}