fetchJson method - WebScraper class - web_scraper library

fetchJson method

Future<Map<String, dynamic>> fetchJson({

required String url,
Map<String, String>? headers,
int? timeout,
int? retries,
bool ignoreRobotsTxt = false,

})

Fetches JSON content from the given URL

url is the URL to fetch headers are additional headers to send with the request timeout is the timeout for the request in milliseconds retries is the number of retry attempts ignoreRobotsTxt whether to ignore robots.txt rules (default: false)

Implementation

Future<Map<String, dynamic>> fetchJson({
  required String url,
  Map<String, String>? headers,
  int? timeout,
  int? retries,
  bool ignoreRobotsTxt = false,
}) async {
  final effectiveHeaders = {
    'User-Agent': _defaultUserAgent,
    'Accept': 'application/json',
    ..._defaultHeaders,
    ...?headers,
  };

  final effectiveTimeout = timeout ?? _defaultTimeout;
  final effectiveRetries = retries ?? _maxRetries;

  // Check robots.txt if enabled and not explicitly ignored
  if (_respectRobotsTxt && !ignoreRobotsTxt) {
    final userAgent = effectiveHeaders['User-Agent'] ?? _defaultUserAgent;
    final isAllowed = await _robotsTxtHandler.isAllowed(url, userAgent);

    if (!isAllowed) {
      _logger.warning('URL not allowed by robots.txt: $url');
      throw ScrapingException.robotsTxt(
        'URL not allowed by robots.txt',
        url: url,
        isRetryable: false,
      );
    }
  }

  final response = await _fetchWithRetry(
    url: url,
    headers: effectiveHeaders,
    timeout: effectiveTimeout,
    retries: effectiveRetries,
  );

  try {
    return json.decode(response) as Map<String, dynamic>;
  } catch (e) {
    throw ScrapingException.parsing(
      'Failed to parse JSON response',
      originalException: e,
      url: url,
      isRetryable: false,
    );
  }
}