toMarkdown static method
Convert HTML to Markdown format
Implementation
static String toMarkdown(String html) {
String markdown = html;
// Remove script and style tags
markdown = markdown.replaceAll(
RegExp(r'<(script|style)[^>]*>.*?</\1>',
caseSensitive: false, dotAll: true),
'');
// Convert headers
markdown = markdown.replaceAllMapped(
RegExp(r'<h1[^>]*>(.*?)</h1>', caseSensitive: false, dotAll: true),
(match) => '# ${match.group(1)}\n\n',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<h2[^>]*>(.*?)</h2>', caseSensitive: false, dotAll: true),
(match) => '## ${match.group(1)}\n\n',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<h3[^>]*>(.*?)</h3>', caseSensitive: false, dotAll: true),
(match) => '### ${match.group(1)}\n\n',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<h4[^>]*>(.*?)</h4>', caseSensitive: false, dotAll: true),
(match) => '#### ${match.group(1)}\n\n',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<h5[^>]*>(.*?)</h5>', caseSensitive: false, dotAll: true),
(match) => '##### ${match.group(1)}\n\n',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<h6[^>]*>(.*?)</h6>', caseSensitive: false, dotAll: true),
(match) => '###### ${match.group(1)}\n\n',
);
// Convert text formatting
markdown = markdown.replaceAllMapped(
RegExp(r'<strong[^>]*>(.*?)</strong>',
caseSensitive: false, dotAll: true),
(match) => '**${match.group(1)}**',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<b[^>]*>(.*?)</b>', caseSensitive: false, dotAll: true),
(match) => '**${match.group(1)}**',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<em[^>]*>(.*?)</em>', caseSensitive: false, dotAll: true),
(match) => '*${match.group(1)}*',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<i[^>]*>(.*?)</i>', caseSensitive: false, dotAll: true),
(match) => '*${match.group(1)}*',
);
markdown = markdown.replaceAllMapped(
RegExp(r'<code[^>]*>(.*?)</code>', caseSensitive: false, dotAll: true),
(match) => '`${match.group(1)}`',
);
// Convert links
markdown = markdown.replaceAllMapped(
RegExp(r'<a[^>]*href=[\"\x27]([^\"\x27]*)[\"\x27][^>]*>(.*?)</a>',
caseSensitive: false, dotAll: true),
(match) => '[${match.group(2)}](${match.group(1)})',
);
// Convert images
markdown = markdown.replaceAllMapped(
RegExp(
r'<img[^>]*src=[\"\x27]([^\"\x27]*)[\"\x27][^>]*alt=[\"\x27]([^\"\x27]*)[\"\x27][^>]*/?>',
caseSensitive: false),
(match) => '})',
);
// Convert lists
markdown = markdown
.replaceAll(RegExp(r'<ul[^>]*>', caseSensitive: false), '')
.replaceAll(RegExp(r'</ul>', caseSensitive: false), '\n')
.replaceAll(RegExp(r'<ol[^>]*>', caseSensitive: false), '')
.replaceAll(RegExp(r'</ol>', caseSensitive: false), '\n');
markdown = markdown.replaceAllMapped(
RegExp(r'<li[^>]*>(.*?)</li>', caseSensitive: false, dotAll: true),
(match) => '- ${match.group(1)}\n',
);
// Convert paragraphs
markdown = markdown.replaceAllMapped(
RegExp(r'<p[^>]*>(.*?)</p>', caseSensitive: false, dotAll: true),
(match) => '${match.group(1)}\n\n',
);
markdown =
markdown.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n');
// Convert blockquotes
markdown = markdown.replaceAllMapped(
RegExp(r'<blockquote[^>]*>(.*?)</blockquote>',
caseSensitive: false, dotAll: true),
(match) => '> ${match.group(1)}\n\n',
);
// Remove remaining HTML tags
markdown = markdown.replaceAll(RegExp(r'<[^>]*>'), '');
return _cleanText(markdown);
}