fetchJson method
Fetches JSON content from the given URL
url
is the URL to fetch
headers
are additional headers to send with the request
timeout
is the timeout for the request in milliseconds
retries
is the number of retry attempts
ignoreRobotsTxt
whether to ignore robots.txt rules (default: false)
Implementation
Future<Map<String, dynamic>> fetchJson({
required String url,
Map<String, String>? headers,
int? timeout,
int? retries,
bool ignoreRobotsTxt = false,
}) async {
final effectiveHeaders = {
'User-Agent': _defaultUserAgent,
'Accept': 'application/json',
..._defaultHeaders,
...?headers,
};
final effectiveTimeout = timeout ?? _defaultTimeout;
final effectiveRetries = retries ?? _maxRetries;
// Check robots.txt if enabled and not explicitly ignored
if (_respectRobotsTxt && !ignoreRobotsTxt) {
final userAgent = effectiveHeaders['User-Agent'] ?? _defaultUserAgent;
final isAllowed = await _robotsTxtHandler.isAllowed(url, userAgent);
if (!isAllowed) {
_logger.warning('URL not allowed by robots.txt: $url');
throw ScrapingException.robotsTxt(
'URL not allowed by robots.txt',
url: url,
isRetryable: false,
);
}
}
final response = await _fetchWithRetry(
url: url,
headers: effectiveHeaders,
timeout: effectiveTimeout,
retries: effectiveRetries,
);
try {
return json.decode(response) as Map<String, dynamic>;
} catch (e) {
throw ScrapingException.parsing(
'Failed to parse JSON response',
originalException: e,
url: url,
isRetryable: false,
);
}
}