handleInfiniteScrollingSite method
Handles a site that uses infinite scrolling
Implementation
Future<HeadlessBrowserResult> handleInfiniteScrollingSite(
String url, {
Map<String, String>? headers,
Map<String, String>? selectors,
Map<String, String>? attributes,
int? timeoutMillis,
int maxScrolls = 10,
int scrollDelay = 1000,
String? itemSelector,
}) async {
_logger.info('Handling infinite scrolling site: $url');
final result = await _service.scrapeUrl(
url,
headers: headers,
timeoutMillis: timeoutMillis,
);
if (!result.success) {
return result;
}
// Get initial item count
int initialItemCount = 0;
if (itemSelector != null) {
final countResult = await _service.executeScript(
"document.querySelectorAll('$itemSelector').length",
);
initialItemCount = countResult is int ? countResult : 0;
}
// Scroll down to trigger infinite loading
int currentItemCount = initialItemCount;
int unchangedScrolls = 0;
for (int i = 0; i < maxScrolls; i++) {
await _service.executeScript(
'window.scrollTo(0, document.body.scrollHeight);',
);
await Future.delayed(Duration(milliseconds: scrollDelay));
// Check if new items were loaded
if (itemSelector != null) {
final newCountResult = await _service.executeScript(
"document.querySelectorAll('$itemSelector').length",
);
final newCount = newCountResult is int ? newCountResult : 0;
if (newCount > currentItemCount) {
_logger.info('Loaded more items: $newCount (was $currentItemCount)');
currentItemCount = newCount;
unchangedScrolls = 0;
} else {
unchangedScrolls++;
if (unchangedScrolls >= 3) {
_logger.info('No new items after 3 scrolls, stopping');
break;
}
}
}
}
// Extract data if selectors provided
Map<String, dynamic>? extractedData;
if (selectors != null && selectors.isNotEmpty) {
extractedData = await _service.executeScript('''
(function() {
const result = {};
${selectors.entries.map((entry) {
final key = entry.key;
final selector = entry.value;
final attribute = attributes?[key];
if (attribute != null) {
return '''
result["$key"] = Array.from(document.querySelectorAll('$selector'))
.map(el => el.getAttribute('$attribute'))
.filter(val => val !== null);
''';
} else {
return '''
result["$key"] = Array.from(document.querySelectorAll('$selector'))
.map(el => el.textContent.trim())
.filter(val => val !== "");
''';
}
}).join('\n')}
return result;
})();
''');
}
// Get updated HTML
final html = await _service.executeScript(
'document.documentElement.outerHTML',
);
if (itemSelector != null) {
_logger.info(
'Total items loaded: $currentItemCount (initial: $initialItemCount)',
);
}
return HeadlessBrowserResult.success(
html: html,
data: extractedData,
elapsedMillis: result.elapsedMillis,
);
}