fetchHtmlStream method

Future<Stream<List<int>>> fetchHtmlStream({
  1. required String url,
  2. Map<String, String>? headers,
  3. int? timeout,
  4. int? retries,
  5. int priority = 0,
})

Fetches HTML content as a stream from the given URL

url is the URL to fetch headers are additional headers to send with the request timeout is the timeout for the request in milliseconds retries is the number of retry attempts priority is the priority of the request (higher values = higher priority)

Implementation

Future<Stream<List<int>>> fetchHtmlStream({
  required String url,
  Map<String, String>? headers,
  int? timeout,
  int? retries,
  int priority = 0,
}) async {
  // Prepare headers with user agent
  final effectiveHeaders = {
    'User-Agent': _userAgentRotator.getRandomUserAgent(),
    ...?headers,
  };

  // User agent is already in the headers

  return _taskQueue.addTask<Stream<List<int>>>(
    task: () async {
      final response = await _httpClient
          .send(
            http.Request('GET', Uri.parse(url))
              ..headers.addAll(effectiveHeaders),
          )
          .timeout(Duration(milliseconds: timeout ?? _defaultTimeout));

      if (response.statusCode >= 200 && response.statusCode < 300) {
        return response.stream;
      } else {
        final statusCode = response.statusCode;
        final errorMessage = 'HTTP error: $statusCode';

        // Create appropriate exception based on status code
        if (statusCode == 429) {
          throw ScrapingException.rateLimit(
            'Rate limit exceeded',
            url: url,
            statusCode: statusCode,
            isRetryable: true,
          );
        } else if (statusCode == 403) {
          throw ScrapingException.permission(
            'Access forbidden',
            url: url,
            statusCode: statusCode,
            isRetryable: false,
          );
        } else if (statusCode == 401) {
          throw ScrapingException.authentication(
            'Authentication required',
            url: url,
            statusCode: statusCode,
            isRetryable: false,
          );
        } else if (statusCode >= 500) {
          throw ScrapingException.http(
            'Server error',
            url: url,
            statusCode: statusCode,
            isRetryable: true,
          );
        } else {
          throw ScrapingException.http(
            errorMessage,
            url: url,
            statusCode: statusCode,
            isRetryable: statusCode >= 500 || statusCode == 429,
          );
        }
      }
    },
    priority: priority,
    taskName: 'FetchHTMLStream-$url',
  );
}