handleInfiniteScrollingSite method

Future<HeadlessBrowserResult> handleInfiniteScrollingSite(
  1. String url, {
  2. Map<String, String>? headers,
  3. Map<String, String>? selectors,
  4. Map<String, String>? attributes,
  5. int? timeoutMillis,
  6. int maxScrolls = 10,
  7. int scrollDelay = 1000,
  8. String? itemSelector,
})

Handles a site that uses infinite scrolling

Implementation

Future<HeadlessBrowserResult> handleInfiniteScrollingSite(
  String url, {
  Map<String, String>? headers,
  Map<String, String>? selectors,
  Map<String, String>? attributes,
  int? timeoutMillis,
  int maxScrolls = 10,
  int scrollDelay = 1000,
  String? itemSelector,
}) async {
  _logger.info('Handling infinite scrolling site: $url');

  final result = await _service.scrapeUrl(
    url,
    headers: headers,
    timeoutMillis: timeoutMillis,
  );

  if (!result.success) {
    return result;
  }

  // Get initial item count
  int initialItemCount = 0;
  if (itemSelector != null) {
    final countResult = await _service.executeScript(
      "document.querySelectorAll('$itemSelector').length",
    );
    initialItemCount = countResult is int ? countResult : 0;
  }

  // Scroll down to trigger infinite loading
  int currentItemCount = initialItemCount;
  int unchangedScrolls = 0;

  for (int i = 0; i < maxScrolls; i++) {
    await _service.executeScript(
      'window.scrollTo(0, document.body.scrollHeight);',
    );
    await Future.delayed(Duration(milliseconds: scrollDelay));

    // Check if new items were loaded
    if (itemSelector != null) {
      final newCountResult = await _service.executeScript(
        "document.querySelectorAll('$itemSelector').length",
      );
      final newCount = newCountResult is int ? newCountResult : 0;

      if (newCount > currentItemCount) {
        _logger.info('Loaded more items: $newCount (was $currentItemCount)');
        currentItemCount = newCount;
        unchangedScrolls = 0;
      } else {
        unchangedScrolls++;
        if (unchangedScrolls >= 3) {
          _logger.info('No new items after 3 scrolls, stopping');
          break;
        }
      }
    }
  }

  // Extract data if selectors provided
  Map<String, dynamic>? extractedData;
  if (selectors != null && selectors.isNotEmpty) {
    extractedData = await _service.executeScript('''
      (function() {
        const result = {};
        ${selectors.entries.map((entry) {
      final key = entry.key;
      final selector = entry.value;
      final attribute = attributes?[key];

      if (attribute != null) {
        return '''
              result["$key"] = Array.from(document.querySelectorAll('$selector'))
                .map(el => el.getAttribute('$attribute'))
                .filter(val => val !== null);
            ''';
      } else {
        return '''
              result["$key"] = Array.from(document.querySelectorAll('$selector'))
                .map(el => el.textContent.trim())
                .filter(val => val !== "");
            ''';
      }
    }).join('\n')}
        return result;
      })();
    ''');
  }

  // Get updated HTML
  final html = await _service.executeScript(
    'document.documentElement.outerHTML',
  );

  if (itemSelector != null) {
    _logger.info(
      'Total items loaded: $currentItemCount (initial: $initialItemCount)',
    );
  }

  return HeadlessBrowserResult.success(
    html: html,
    data: extractedData,
    elapsedMillis: result.elapsedMillis,
  );
}