extractStructuredData method

List<Map<String, String>> extractStructuredData({
  1. required String html,
  2. required Map<String, String> selectors,
  3. Map<String, String?>? attributes,
})

Parses HTML content and extracts structured data using CSS selectors

html is the HTML content to parse selectors is a map of field names to CSS selectors attributes is a map of field names to attributes to extract (optional)

Implementation

List<Map<String, String>> extractStructuredData({
  required String html,
  required Map<String, String> selectors,
  Map<String, String?>? attributes,
}) {
  try {
    final document = html_parser.parse(html);
    final result = <Map<String, String>>[];

    // Find the maximum number of items for any selector
    int maxItems = 0;
    selectors.forEach((field, selector) {
      final elements = document.querySelectorAll(selector);
      if (elements.length > maxItems) {
        maxItems = elements.length;
      }
    });

    // Extract data for each item
    for (int i = 0; i < maxItems; i++) {
      final item = <String, String>{};

      selectors.forEach((field, selector) {
        final elements = document.querySelectorAll(selector);
        if (i < elements.length) {
          final element = elements[i];
          final attribute = attributes?[field];

          if (attribute != null) {
            item[field] = element.attributes[attribute] ?? '';
          } else {
            item[field] = element.text.trim();
          }
        } else {
          item[field] = '';
        }
      });

      result.add(item);
    }

    return result;
  } catch (e) {
    throw ScrapingException.parsing(
      'Failed to extract structured data',
      originalException: e,
      isRetryable: false,
    );
  }
}