removeClutter static method
Remove clutter elements like ads, navigation, etc.
Implementation
static String removeClutter(String html) {
String cleaned = html;
// Remove script and style tags
cleaned = cleaned.replaceAll(
RegExp(r'<(script|style)[^>]*>.*?</\1>',
caseSensitive: false, dotAll: true),
'');
// Remove navigation elements
cleaned = cleaned.replaceAll(
RegExp(r'<(nav|header|footer|aside|menu)[^>]*>.*?</\1>',
caseSensitive: false, dotAll: true),
'');
// Remove elements with ad-related classes/ids
final adPatterns = [
r'ad',
r'advertisement',
r'banner',
r'popup',
r'modal',
r'social',
r'share',
r'comment',
r'sidebar',
r'widget',
r'navigation',
r'nav',
r'menu',
r'breadcrumb'
];
for (final pattern in adPatterns) {
cleaned = cleaned.replaceAll(
RegExp(
r'<[^>]*(?:class|id)=[\"\x27][^\"\x27]*' +
pattern +
r'[^\"\x27]*[\"\x27][^>]*>.*?</[^>]*>',
caseSensitive: false,
dotAll: true),
'');
}
return cleaned;
}