getStrategyForUrl method
Gets the optimal strategy for the given URL
Implementation
ScrapingStrategy getStrategyForUrl(String url) {
final isProblematic = _reputationTracker.isProblematicSite(url);
final reputation = _reputationTracker.getReputation(url);
// Start with the default strategy
ScrapingStrategy strategy = _defaultStrategy;
// If the site is problematic, adjust the strategy
if (isProblematic) {
strategy = strategy.copyWith(
retries: strategy.retries * 2,
timeout: strategy.timeout * 2,
initialBackoff:
(strategy.initialBackoff * 0.5).toInt(), // Shorter initial backoff
useRandomUserAgent: true,
rotateProxiesOnRetry: true,
validateProxies: true,
);
}
// If we have reputation data, further refine the strategy
if (reputation != null) {
// Adjust headers based on reputation
final headers = _reputationTracker.getOptimalHeaders(
url,
strategy.headers,
);
// Adjust timeout based on reputation
final timeout = _reputationTracker.getOptimalTimeout(
url,
strategy.timeout,
);
// Adjust retries based on reputation
final retries = _reputationTracker.getOptimalRetries(
url,
strategy.retries,
);
strategy = strategy.copyWith(
headers: headers,
timeout: timeout,
retries: retries,
);
// If the site has specific error patterns, make further adjustments
if (reputation.hasErrorPattern('timeout')) {
strategy = strategy.copyWith(
timeout: strategy.timeout * 2,
backoffMultiplier: 2.0, // More aggressive backoff
);
}
if (reputation.hasErrorPattern('connection closed') ||
reputation.hasErrorPattern('connection reset')) {
strategy = strategy.copyWith(
initialBackoff:
(strategy.initialBackoff * 0.3)
.toInt(), // Even shorter initial backoff
rotateProxiesOnRetry: true,
);
}
if (reputation.hasErrorPattern('ssl') ||
reputation.hasErrorPattern('certificate')) {
// Add SSL-specific headers
final sslHeaders = Map<String, String>.from(strategy.headers);
sslHeaders['Sec-Fetch-Dest'] = 'document';
sslHeaders['Sec-Fetch-Mode'] = 'navigate';
sslHeaders['Sec-Fetch-Site'] = 'none';
sslHeaders['Sec-Fetch-User'] = '?1';
strategy = strategy.copyWith(headers: sslHeaders);
}
}
// If using random user agent, select one
if (strategy.useRandomUserAgent) {
final headers = Map<String, String>.from(strategy.headers);
headers['User-Agent'] = _getRandomUserAgent();
strategy = strategy.copyWith(headers: headers);
}
return strategy;
}