fetchHtmlStream method
Fetches HTML content as a stream from the given URL
url
is the URL to fetch
headers
are additional headers to send with the request
timeout
is the timeout for the request in milliseconds
retries
is the number of retry attempts
priority
is the priority of the request (higher values = higher priority)
Implementation
Future<Stream<List<int>>> fetchHtmlStream({
required String url,
Map<String, String>? headers,
int? timeout,
int? retries,
int priority = 0,
}) async {
// Prepare headers with user agent
final effectiveHeaders = {
'User-Agent': _userAgentRotator.getRandomUserAgent(),
...?headers,
};
// User agent is already in the headers
return _taskQueue.addTask<Stream<List<int>>>(
task: () async {
final response = await _httpClient
.send(
http.Request('GET', Uri.parse(url))
..headers.addAll(effectiveHeaders),
)
.timeout(Duration(milliseconds: timeout ?? _defaultTimeout));
if (response.statusCode >= 200 && response.statusCode < 300) {
return response.stream;
} else {
final statusCode = response.statusCode;
final errorMessage = 'HTTP error: $statusCode';
// Create appropriate exception based on status code
if (statusCode == 429) {
throw ScrapingException.rateLimit(
'Rate limit exceeded',
url: url,
statusCode: statusCode,
isRetryable: true,
);
} else if (statusCode == 403) {
throw ScrapingException.permission(
'Access forbidden',
url: url,
statusCode: statusCode,
isRetryable: false,
);
} else if (statusCode == 401) {
throw ScrapingException.authentication(
'Authentication required',
url: url,
statusCode: statusCode,
isRetryable: false,
);
} else if (statusCode >= 500) {
throw ScrapingException.http(
'Server error',
url: url,
statusCode: statusCode,
isRetryable: true,
);
} else {
throw ScrapingException.http(
errorMessage,
url: url,
statusCode: statusCode,
isRetryable: statusCode >= 500 || statusCode == 429,
);
}
}
},
priority: priority,
taskName: 'FetchHTMLStream-$url',
);
}