toMarkdown static method

String toMarkdown(
  1. String html
)

Convert HTML to Markdown format

Implementation

static String toMarkdown(String html) {
  String markdown = html;

  // Remove script and style tags
  markdown = markdown.replaceAll(
      RegExp(r'<(script|style)[^>]*>.*?</\1>',
          caseSensitive: false, dotAll: true),
      '');

  // Convert headers
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h1[^>]*>(.*?)</h1>', caseSensitive: false, dotAll: true),
    (match) => '# ${match.group(1)}\n\n',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h2[^>]*>(.*?)</h2>', caseSensitive: false, dotAll: true),
    (match) => '## ${match.group(1)}\n\n',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h3[^>]*>(.*?)</h3>', caseSensitive: false, dotAll: true),
    (match) => '### ${match.group(1)}\n\n',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h4[^>]*>(.*?)</h4>', caseSensitive: false, dotAll: true),
    (match) => '#### ${match.group(1)}\n\n',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h5[^>]*>(.*?)</h5>', caseSensitive: false, dotAll: true),
    (match) => '##### ${match.group(1)}\n\n',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<h6[^>]*>(.*?)</h6>', caseSensitive: false, dotAll: true),
    (match) => '###### ${match.group(1)}\n\n',
  );

  // Convert text formatting
  markdown = markdown.replaceAllMapped(
    RegExp(r'<strong[^>]*>(.*?)</strong>',
        caseSensitive: false, dotAll: true),
    (match) => '**${match.group(1)}**',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<b[^>]*>(.*?)</b>', caseSensitive: false, dotAll: true),
    (match) => '**${match.group(1)}**',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<em[^>]*>(.*?)</em>', caseSensitive: false, dotAll: true),
    (match) => '*${match.group(1)}*',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<i[^>]*>(.*?)</i>', caseSensitive: false, dotAll: true),
    (match) => '*${match.group(1)}*',
  );
  markdown = markdown.replaceAllMapped(
    RegExp(r'<code[^>]*>(.*?)</code>', caseSensitive: false, dotAll: true),
    (match) => '`${match.group(1)}`',
  );

  // Convert links
  markdown = markdown.replaceAllMapped(
    RegExp(r'<a[^>]*href=[\"\x27]([^\"\x27]*)[\"\x27][^>]*>(.*?)</a>',
        caseSensitive: false, dotAll: true),
    (match) => '[${match.group(2)}](${match.group(1)})',
  );

  // Convert images
  markdown = markdown.replaceAllMapped(
    RegExp(
        r'<img[^>]*src=[\"\x27]([^\"\x27]*)[\"\x27][^>]*alt=[\"\x27]([^\"\x27]*)[\"\x27][^>]*/?>',
        caseSensitive: false),
    (match) => '![${match.group(2)}](${match.group(1)})',
  );

  // Convert lists
  markdown = markdown
      .replaceAll(RegExp(r'<ul[^>]*>', caseSensitive: false), '')
      .replaceAll(RegExp(r'</ul>', caseSensitive: false), '\n')
      .replaceAll(RegExp(r'<ol[^>]*>', caseSensitive: false), '')
      .replaceAll(RegExp(r'</ol>', caseSensitive: false), '\n');
  markdown = markdown.replaceAllMapped(
    RegExp(r'<li[^>]*>(.*?)</li>', caseSensitive: false, dotAll: true),
    (match) => '- ${match.group(1)}\n',
  );

  // Convert paragraphs
  markdown = markdown.replaceAllMapped(
    RegExp(r'<p[^>]*>(.*?)</p>', caseSensitive: false, dotAll: true),
    (match) => '${match.group(1)}\n\n',
  );
  markdown =
      markdown.replaceAll(RegExp(r'<br\s*/?>', caseSensitive: false), '\n');

  // Convert blockquotes
  markdown = markdown.replaceAllMapped(
    RegExp(r'<blockquote[^>]*>(.*?)</blockquote>',
        caseSensitive: false, dotAll: true),
    (match) => '> ${match.group(1)}\n\n',
  );

  // Remove remaining HTML tags
  markdown = markdown.replaceAll(RegExp(r'<[^>]*>'), '');

  return _cleanText(markdown);
}