fetchHtml method

  1. @override
Future<String> fetchHtml({
  1. required String url,
  2. required Map<String, String> headers,
  3. required int timeout,
  4. required ScrapingLogger logger,
})
override

Fetches HTML content from the given URL

Implementation

@override
Future<String> fetchHtml({
  required String url,
  required Map<String, String> headers,
  required int timeout,
  required ScrapingLogger logger,
}) async {
  logger.info('Using specialized handler for vegamovies');

  // Get a sequence of user agents to try for this site
  final userAgents = _userAgentManager.getUserAgentSequenceForProblematicSite(
    url,
  );
  logger.info('Prepared ${userAgents.length} user agents to try');

  // Ensure URL has proper scheme
  if (!url.startsWith('http://') && !url.startsWith('https://')) {
    url = 'https://$url';
  }

  // Try with each user agent
  for (final userAgent in userAgents) {
    logger.info('Trying with user agent: ${_truncateUserAgent(userAgent)}');

    // Enhanced headers specifically for vegamovies
    final enhancedHeaders = {
      'User-Agent': userAgent,
      'Accept':
          'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
      'Accept-Language': 'en-US,en;q=0.5',
      'Accept-Encoding': 'gzip, deflate, br',
      'Connection': 'keep-alive',
      'Upgrade-Insecure-Requests': '1',
      'Cache-Control': 'max-age=0',
      'Sec-Fetch-Dest': 'document',
      'Sec-Fetch-Mode': 'navigate',
      'Sec-Fetch-Site': 'none',
      'Sec-Fetch-User': '?1',
      'Pragma': 'no-cache',
      ...headers,
    };

    // Try different approaches with this user agent
    try {
      // First try: HttpClient approach
      logger.info('Attempting direct HttpClient approach');
      try {
        final httpClient = HttpClient();

        try {
          // Configure client
          httpClient.connectionTimeout = Duration(milliseconds: timeout ~/ 2);
          httpClient.idleTimeout = Duration(milliseconds: timeout);
          httpClient.badCertificateCallback = (cert, host, port) => true;

          // Create request
          final request = await httpClient.getUrl(Uri.parse(url));

          // Add headers
          enhancedHeaders.forEach((name, value) {
            request.headers.set(name, value);
          });

          // Send request
          logger.request('Sending request to $url');
          final response = await request.close();
          logger.response('Received response: ${response.statusCode}');

          // Read response
          final completer = Completer<String>();
          final contents = StringBuffer();

          response
              .transform(utf8.decoder)
              .listen(
                (data) {
                  contents.write(data);
                },
                onDone: () {
                  completer.complete(contents.toString());
                },
                onError: (e) {
                  completer.completeError(e);
                },
                cancelOnError: true,
              );

          final result = await completer.future;
          logger.success('Successfully fetched with HttpClient');
          return result;
        } finally {
          httpClient.close();
        }
      } catch (e) {
        logger.error('HttpClient approach failed: $e');
      }

      // Second try: http package
      logger.info('Attempting with http package');
      try {
        final response = await http
            .get(Uri.parse(url), headers: enhancedHeaders)
            .timeout(Duration(milliseconds: timeout));

        if (response.statusCode >= 200 && response.statusCode < 300) {
          logger.success('Successfully fetched with http package');
          return response.body;
        } else {
          logger.error('HTTP error: ${response.statusCode}');
        }
      } catch (e) {
        logger.error('http package approach failed: $e');
      }

      // Third try: Try with different domain extensions
      logger.info('Attempting with different domain extensions');
      final domains = [
        'vegamovies.tv',
        'vegamovies.td',
        'vegamovies.nl',
        'vegamovies.lol',
      ];

      for (final domain in domains) {
        if (!url.contains(domain)) {
          final baseUrl = url.split('/').last;
          final alternativeUrl = 'https://$domain/$baseUrl';

          try {
            logger.info('Trying alternative domain: $alternativeUrl');
            final response = await http
                .get(Uri.parse(alternativeUrl), headers: enhancedHeaders)
                .timeout(Duration(milliseconds: timeout));

            if (response.statusCode >= 200 && response.statusCode < 300) {
              logger.success(
                'Successfully fetched with alternative domain: $domain',
              );
              return response.body;
            } else {
              logger.error(
                'HTTP error with alternative domain: ${response.statusCode}',
              );
            }
          } catch (e) {
            logger.error(
              'Alternative domain approach failed for $domain: $e',
            );
          }
        }
      }
    } catch (e) {
      // This catch block should never be reached due to inner try-catch blocks,
      // but it's here as a safety net
      logger.error(
        'Unexpected error with user agent ${_truncateUserAgent(userAgent)}: $e',
      );
    }

    // If we reach here, all approaches with this user agent failed
    // We'll try the next user agent
  }

  // If we reach here, all user agents and approaches failed
  throw Exception(
    'All approaches failed for vegamovies after trying ${userAgents.length} user agents',
  );
}