extractTitle static method
Extract page title using multiple fallback strategies
Implementation
static String? extractTitle(String html) {
// Priority order: OG title, title tag, h1 tags
final strategies = [
// Open Graph title
r'<meta[^>]*property=[\"\x27]og:title[\"\x27][^>]*content=[\"\x27]([^\"\x27]*)[\"\x27]',
// Title tag
r'<title[^>]*>([^<]*)</title>',
// H1 tags
r'<h1[^>]*>([^<]*)</h1>',
// Article title patterns
r'<[^>]*class=[\"\x27][^\"\x27]*title[^\"\x27]*[\"\x27][^>]*>([^<]*)<',
r'<[^>]*class=[\"\x27][^\"\x27]*headline[^\"\x27]*[\"\x27][^>]*>([^<]*)<',
];
for (final pattern in strategies) {
final match = RegExp(pattern, caseSensitive: false).firstMatch(html);
if (match != null) {
final title = _cleanText(match.group(1) ?? '');
if (title.isNotEmpty) return title;
}
}
return null;
}