extractFromDocument method - AdaptiveSelector class - adaptive_selector library

pivox package
documentation
features/web_scraping/selector/adaptive_selector.dart
AdaptiveSelector
extractFromDocument method

extractFromDocument method

List<String> extractFromDocument(

Document document

)

Extracts data from the given document using this selector

Implementation

List<String> extractFromDocument(Document document) {
  // Try the primary selector first
  var elements = document.querySelectorAll(primarySelector);

  // If the primary selector doesn't find any elements, try the alternatives
  if (elements.isEmpty && alternativeSelectors.isNotEmpty) {
    logger?.info(
      'Primary selector "$primarySelector" found no elements, trying alternatives',
    );

    for (final alternativeSelector in alternativeSelectors) {
      elements = document.querySelectorAll(alternativeSelector);
      if (elements.isNotEmpty) {
        logger?.info(
          'Alternative selector "$alternativeSelector" found ${elements.length} elements',
        );
        break;
      }
    }
  }

  if (elements.isEmpty) {
    logger?.warning('No elements found with any selectors');
    return [];
  }

  logger?.info('Found ${elements.length} elements with selector');

  // Extract the data from the elements
  return elements.map((element) {
    if (attribute != null) {
      final value = element.attributes[attribute] ?? '';
      if (value.isEmpty) {
        logger?.warning(
          'Attribute "$attribute" not found or empty in element',
        );
      }
      return value;
    } else if (asText) {
      final text = element.text.trim();
      if (text.isEmpty) {
        logger?.warning('Text content is empty in element');
      }
      return text;
    } else {
      return element.outerHtml;
    }
  }).toList();
}