extractStructuredData method
Parses HTML content and extracts structured data using CSS selectors
html
is the HTML content to parse
selectors
is a map of field names to CSS selectors
attributes
is a map of field names to attributes to extract (optional)
Implementation
List<Map<String, String>> extractStructuredData({
required String html,
required Map<String, String> selectors,
Map<String, String?>? attributes,
}) {
try {
final document = html_parser.parse(html);
final result = <Map<String, String>>[];
// Find the maximum number of items for any selector
int maxItems = 0;
selectors.forEach((field, selector) {
final elements = document.querySelectorAll(selector);
if (elements.length > maxItems) {
maxItems = elements.length;
}
});
// Extract data for each item
for (int i = 0; i < maxItems; i++) {
final item = <String, String>{};
selectors.forEach((field, selector) {
final elements = document.querySelectorAll(selector);
if (i < elements.length) {
final element = elements[i];
final attribute = attributes?[field];
if (attribute != null) {
item[field] = element.attributes[attribute] ?? '';
} else {
item[field] = element.text.trim();
}
} else {
item[field] = '';
}
});
result.add(item);
}
return result;
} catch (e) {
throw ScrapingException.parsing(
'Failed to extract structured data',
originalException: e,
isRetryable: false,
);
}
}