fetchHtml method
Future<String>
fetchHtml({
- required String url,
- required Map<
String, String> headers, - required int timeout,
- required ScrapingLogger logger,
override
Fetches HTML content from the given URL
Implementation
@override
Future<String> fetchHtml({
required String url,
required Map<String, String> headers,
required int timeout,
required ScrapingLogger logger,
}) async {
logger.info('Using specialized handler for vegamovies');
// Get a sequence of user agents to try for this site
final userAgents = _userAgentManager.getUserAgentSequenceForProblematicSite(
url,
);
logger.info('Prepared ${userAgents.length} user agents to try');
// Ensure URL has proper scheme
if (!url.startsWith('http://') && !url.startsWith('https://')) {
url = 'https://$url';
}
// Try with each user agent
for (final userAgent in userAgents) {
logger.info('Trying with user agent: ${_truncateUserAgent(userAgent)}');
// Enhanced headers specifically for vegamovies
final enhancedHeaders = {
'User-Agent': userAgent,
'Accept':
'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'Cache-Control': 'max-age=0',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Pragma': 'no-cache',
...headers,
};
// Try different approaches with this user agent
try {
// First try: HttpClient approach
logger.info('Attempting direct HttpClient approach');
try {
final httpClient = HttpClient();
try {
// Configure client
httpClient.connectionTimeout = Duration(milliseconds: timeout ~/ 2);
httpClient.idleTimeout = Duration(milliseconds: timeout);
httpClient.badCertificateCallback = (cert, host, port) => true;
// Create request
final request = await httpClient.getUrl(Uri.parse(url));
// Add headers
enhancedHeaders.forEach((name, value) {
request.headers.set(name, value);
});
// Send request
logger.request('Sending request to $url');
final response = await request.close();
logger.response('Received response: ${response.statusCode}');
// Read response
final completer = Completer<String>();
final contents = StringBuffer();
response
.transform(utf8.decoder)
.listen(
(data) {
contents.write(data);
},
onDone: () {
completer.complete(contents.toString());
},
onError: (e) {
completer.completeError(e);
},
cancelOnError: true,
);
final result = await completer.future;
logger.success('Successfully fetched with HttpClient');
return result;
} finally {
httpClient.close();
}
} catch (e) {
logger.error('HttpClient approach failed: $e');
}
// Second try: http package
logger.info('Attempting with http package');
try {
final response = await http
.get(Uri.parse(url), headers: enhancedHeaders)
.timeout(Duration(milliseconds: timeout));
if (response.statusCode >= 200 && response.statusCode < 300) {
logger.success('Successfully fetched with http package');
return response.body;
} else {
logger.error('HTTP error: ${response.statusCode}');
}
} catch (e) {
logger.error('http package approach failed: $e');
}
// Third try: Try with different domain extensions
logger.info('Attempting with different domain extensions');
final domains = [
'vegamovies.tv',
'vegamovies.td',
'vegamovies.nl',
'vegamovies.lol',
];
for (final domain in domains) {
if (!url.contains(domain)) {
final baseUrl = url.split('/').last;
final alternativeUrl = 'https://$domain/$baseUrl';
try {
logger.info('Trying alternative domain: $alternativeUrl');
final response = await http
.get(Uri.parse(alternativeUrl), headers: enhancedHeaders)
.timeout(Duration(milliseconds: timeout));
if (response.statusCode >= 200 && response.statusCode < 300) {
logger.success(
'Successfully fetched with alternative domain: $domain',
);
return response.body;
} else {
logger.error(
'HTTP error with alternative domain: ${response.statusCode}',
);
}
} catch (e) {
logger.error(
'Alternative domain approach failed for $domain: $e',
);
}
}
}
} catch (e) {
// This catch block should never be reached due to inner try-catch blocks,
// but it's here as a safety net
logger.error(
'Unexpected error with user agent ${_truncateUserAgent(userAgent)}: $e',
);
}
// If we reach here, all approaches with this user agent failed
// We'll try the next user agent
}
// If we reach here, all user agents and approaches failed
throw Exception(
'All approaches failed for vegamovies after trying ${userAgents.length} user agents',
);
}