scrapeInParallel<T> method
Scrapes multiple URLs in parallel
Implementation
Future<List<T>> scrapeInParallel<T>({
required List<String> urls,
required Future<T> Function(String html, String url) extractor,
required TaskScheduler scheduler,
Map<String, String>? headers,
int? timeout,
int? retries,
TaskPriority priority = TaskPriority.normal,
int maxRetries = 3,
}) async {
// Create a task for each URL
final tasks = <ScrapingTask<T>>[];
final results = <T>[];
for (int i = 0; i < urls.length; i++) {
final url = urls[i];
final domain = _extractDomain(url);
// Create a task for this URL
final task = ScrapingTask<T>(
id: Uuid().v4(),
domain: domain,
url: url,
execute: () async {
// Fetch the HTML
final html = await fetchHtml(
url: url,
headers: headers,
timeout: timeout,
retries: retries,
);
// Extract the data
return await extractor(html, url);
},
priority: priority,
maxRetries: maxRetries,
logger: Logger('ScrapingTask-$i'),
);
// Add the task to the list
tasks.add(task);
// Enqueue the task
scheduler.enqueue(task);
}
// Wait for all tasks to complete
for (final task in tasks) {
try {
final result = await task.future;
results.add(result);
} catch (e) {
logger.error('Error scraping ${task.url}: $e');
}
}
return results;
}