execute<T> method

Future<T> execute<T>({
  1. required String url,
  2. required Future<T> fn(),
  3. String? userAgent,
  4. int priority = 0,
})

Executes a function with rate limiting

url is the URL to rate limit fn is the function to execute userAgent is the user agent to use for robots.txt crawl delay priority is the priority of the request (higher values = higher priority)

Implementation

Future<T> execute<T>({
  required String url,
  required Future<T> Function() fn,
  String? userAgent,
  int priority = 0,
}) async {
  final domain = _extractDomain(url);

  // Check if the domain is rate limited
  if (_isRateLimited(domain)) {
    final retryAfter = _rateLimitStatus[domain]!.retryAfter;
    _logger.warning(
      'Domain $domain is rate limited. Retry after ${retryAfter.toIso8601String()}',
    );

    // Wait until the rate limit expires
    final now = DateTime.now();
    if (retryAfter.isAfter(now)) {
      final waitTime = retryAfter.difference(now).inMilliseconds;
      _logger.info('Waiting ${waitTime}ms for rate limit to expire');
      await Future.delayed(Duration(milliseconds: waitTime));
    }
  }

  // Get the crawl delay from robots.txt if available
  int? robotsDelay;
  if (_robotsTxtHandler != null && userAgent != null) {
    try {
      final rules = await _robotsTxtHandler.getRules(domain);
      robotsDelay = rules?.getCrawlDelay(userAgent);

      if (robotsDelay != null) {
        _logger.info(
          'Using robots.txt crawl delay of ${robotsDelay}ms for $domain',
        );
      }
    } catch (e) {
      _logger.warning('Error getting robots.txt crawl delay: $e');
    }
  }

  // Create a completer for this request
  final completer = Completer<T>();

  // Create a queued request
  final queuedRequest = _QueuedRequest<T>(
    fn: fn,
    completer: completer,
    priority: priority,
    createdAt: DateTime.now(),
  );

  // Add the request to the queue
  _requestQueues.putIfAbsent(domain, () => Queue<_QueuedRequest>());
  _requestQueues[domain]!.add(queuedRequest);

  // Sort the queue by priority (higher priority first)
  final queue = _requestQueues[domain]!;
  final sortedList =
      queue.toList()..sort((a, b) {
        // First sort by priority (higher first)
        final priorityDiff = b.priority - a.priority;
        if (priorityDiff != 0) return priorityDiff;

        // Then sort by creation time (earlier first)
        return a.createdAt.compareTo(b.createdAt);
      });

  // Clear the queue and add the sorted items back
  queue.clear();
  queue.addAll(sortedList);

  // Process the queue if this is the only request
  if (_requestQueues[domain]!.length == 1) {
    _processQueue(domain, robotsDelay);
  }

  // Return the future from the completer
  return completer.future;
}