execute<T> method
Executes a function with rate limiting
url
is the URL to rate limit
fn
is the function to execute
userAgent
is the user agent to use for robots.txt crawl delay
priority
is the priority of the request (higher values = higher priority)
Implementation
Future<T> execute<T>({
required String url,
required Future<T> Function() fn,
String? userAgent,
int priority = 0,
}) async {
final domain = _extractDomain(url);
// Check if the domain is rate limited
if (_isRateLimited(domain)) {
final retryAfter = _rateLimitStatus[domain]!.retryAfter;
_logger.warning(
'Domain $domain is rate limited. Retry after ${retryAfter.toIso8601String()}',
);
// Wait until the rate limit expires
final now = DateTime.now();
if (retryAfter.isAfter(now)) {
final waitTime = retryAfter.difference(now).inMilliseconds;
_logger.info('Waiting ${waitTime}ms for rate limit to expire');
await Future.delayed(Duration(milliseconds: waitTime));
}
}
// Get the crawl delay from robots.txt if available
int? robotsDelay;
if (_robotsTxtHandler != null && userAgent != null) {
try {
final rules = await _robotsTxtHandler.getRules(domain);
robotsDelay = rules?.getCrawlDelay(userAgent);
if (robotsDelay != null) {
_logger.info(
'Using robots.txt crawl delay of ${robotsDelay}ms for $domain',
);
}
} catch (e) {
_logger.warning('Error getting robots.txt crawl delay: $e');
}
}
// Create a completer for this request
final completer = Completer<T>();
// Create a queued request
final queuedRequest = _QueuedRequest<T>(
fn: fn,
completer: completer,
priority: priority,
createdAt: DateTime.now(),
);
// Add the request to the queue
_requestQueues.putIfAbsent(domain, () => Queue<_QueuedRequest>());
_requestQueues[domain]!.add(queuedRequest);
// Sort the queue by priority (higher priority first)
final queue = _requestQueues[domain]!;
final sortedList =
queue.toList()..sort((a, b) {
// First sort by priority (higher first)
final priorityDiff = b.priority - a.priority;
if (priorityDiff != 0) return priorityDiff;
// Then sort by creation time (earlier first)
return a.createdAt.compareTo(b.createdAt);
});
// Clear the queue and add the sorted items back
queue.clear();
queue.addAll(sortedList);
// Process the queue if this is the only request
if (_requestQueues[domain]!.length == 1) {
_processQueue(domain, robotsDelay);
}
// Return the future from the completer
return completer.future;
}