getStrategyForUrl method

ScrapingStrategy getStrategyForUrl(
  1. String url
)

Gets the optimal strategy for the given URL

Implementation

ScrapingStrategy getStrategyForUrl(String url) {
  final isProblematic = _reputationTracker.isProblematicSite(url);
  final reputation = _reputationTracker.getReputation(url);

  // Start with the default strategy
  ScrapingStrategy strategy = _defaultStrategy;

  // If the site is problematic, adjust the strategy
  if (isProblematic) {
    strategy = strategy.copyWith(
      retries: strategy.retries * 2,
      timeout: strategy.timeout * 2,
      initialBackoff:
          (strategy.initialBackoff * 0.5).toInt(), // Shorter initial backoff
      useRandomUserAgent: true,
      rotateProxiesOnRetry: true,
      validateProxies: true,
    );
  }

  // If we have reputation data, further refine the strategy
  if (reputation != null) {
    // Adjust headers based on reputation
    final headers = _reputationTracker.getOptimalHeaders(
      url,
      strategy.headers,
    );

    // Adjust timeout based on reputation
    final timeout = _reputationTracker.getOptimalTimeout(
      url,
      strategy.timeout,
    );

    // Adjust retries based on reputation
    final retries = _reputationTracker.getOptimalRetries(
      url,
      strategy.retries,
    );

    strategy = strategy.copyWith(
      headers: headers,
      timeout: timeout,
      retries: retries,
    );

    // If the site has specific error patterns, make further adjustments
    if (reputation.hasErrorPattern('timeout')) {
      strategy = strategy.copyWith(
        timeout: strategy.timeout * 2,
        backoffMultiplier: 2.0, // More aggressive backoff
      );
    }

    if (reputation.hasErrorPattern('connection closed') ||
        reputation.hasErrorPattern('connection reset')) {
      strategy = strategy.copyWith(
        initialBackoff:
            (strategy.initialBackoff * 0.3)
                .toInt(), // Even shorter initial backoff
        rotateProxiesOnRetry: true,
      );
    }

    if (reputation.hasErrorPattern('ssl') ||
        reputation.hasErrorPattern('certificate')) {
      // Add SSL-specific headers
      final sslHeaders = Map<String, String>.from(strategy.headers);
      sslHeaders['Sec-Fetch-Dest'] = 'document';
      sslHeaders['Sec-Fetch-Mode'] = 'navigate';
      sslHeaders['Sec-Fetch-Site'] = 'none';
      sslHeaders['Sec-Fetch-User'] = '?1';

      strategy = strategy.copyWith(headers: sslHeaders);
    }
  }

  // If using random user agent, select one
  if (strategy.useRandomUserAgent) {
    final headers = Map<String, String>.from(strategy.headers);
    headers['User-Agent'] = _getRandomUserAgent();
    strategy = strategy.copyWith(headers: headers);
  }

  return strategy;
}