handleLazyLoading method

Future<LazyLoadResult> handleLazyLoading({
  1. required String url,
  2. required LazyLoadConfig config,
  3. Map<String, String>? headers,
})

Handles lazy loading for a URL

url is the URL to handle config is the lazy loading configuration headers are additional headers to send with the request

Implementation

Future<LazyLoadResult> handleLazyLoading({
  required String url,
  required LazyLoadConfig config,
  Map<String, String>? headers,
}) async {
  if (!config.handleLazyLoading) {
    // If lazy loading is disabled, just fetch the HTML
    final html = await _fetchHtml(url, headers);
    return LazyLoadResult.original(html);
  }

  try {
    // First, fetch the HTML normally
    final html = await _fetchHtml(url, headers);

    // Detect lazy loading
    final detectionResult = _lazyLoadDetector.detectLazyLoading(html);
    final lazyLoadingDetected = detectionResult.hasLazyLoading;

    if (!lazyLoadingDetected) {
      // If no lazy loading is detected, return the original HTML
      logger?.info('No lazy loading detected for $url');
      return LazyLoadResult.original(html);
    }

    logger?.info(
      'Detected ${detectionResult.type} lazy loading for $url. '
      'Requires JavaScript: ${detectionResult.requiresJavaScript}, '
      'Requires scrolling: ${detectionResult.requiresScrolling}, '
      'Requires interaction: ${detectionResult.requiresInteraction}',
    );

    // If lazy loading is detected but doesn't require JavaScript, scrolling, or interaction,
    // return the original HTML
    if (!detectionResult.requiresJavaScript &&
        !detectionResult.requiresScrolling &&
        !detectionResult.requiresInteraction) {
      return LazyLoadResult.original(html);
    }

    // If lazy loading requires JavaScript, scrolling, or interaction,
    // use a headless browser if enabled
    if (config.useHeadlessBrowser) {
      final startTime = DateTime.now();
      int scrollCount = 0;
      int clickCount = 0;

      // Configure the headless browser
      final browserConfig = HeadlessBrowserConfig(
        url: url,
        headers: headers,
        waitForDomContentLoaded: config.waitForDomContentLoaded,
        waitForNetworkIdle: config.waitForNetworkIdle,
        timeout: config.maxWaitTimeMs,
        userAgent: headers?['User-Agent'],
      );

      // Launch the headless browser
      logger?.info('Launching headless browser for $url');
      final browser = await _headlessBrowser.launch(browserConfig);

      try {
        // Scroll if needed
        if (detectionResult.requiresScrolling) {
          logger?.info('Scrolling to reveal lazy-loaded content');
          for (int i = 0; i < config.maxScrollDepth; i++) {
            // Scroll down
            await browser.executeScript(
              'window.scrollBy(0, ${config.scrollStepSize * 100}vh);',
            );
            scrollCount++;

            // Wait for content to load
            await Future.delayed(
              Duration(milliseconds: config.scrollDelayMs),
            );

            // Check if we've reached the bottom of the page
            final isAtBottom =
                await browser.executeScript(
                      'return (window.innerHeight + window.scrollY) >= document.body.scrollHeight;',
                    )
                    as bool;

            if (isAtBottom) {
              logger?.info('Reached the bottom of the page');
              break;
            }
          }
        }

        // Click on load more buttons if needed
        if (detectionResult.requiresInteraction &&
            config.clickLoadMoreButtons &&
            detectionResult.triggerElements.isNotEmpty) {
          logger?.info('Clicking on load more buttons');
          for (final element in detectionResult.triggerElements) {
            // Try to find the element by various attributes
            final id = element.id;
            final classes = element.classes.join(' ');
            final text = element.text.trim();

            String? selector;
            if (id.isNotEmpty) {
              selector = '#$id';
            } else if (classes.isNotEmpty) {
              selector = '.${classes.replaceAll(' ', '.')}';
            } else if (text.isNotEmpty) {
              selector = 'button:contains("$text"), a:contains("$text")';
            }

            if (selector != null) {
              try {
                // Click the element
                await browser.click(selector);
                clickCount++;

                // Wait for content to load
                await Future.delayed(
                  Duration(milliseconds: config.scrollDelayMs),
                );
              } catch (e) {
                logger?.warning(
                  'Failed to click element with selector "$selector": $e',
                );
              }
            }
          }
        }

        // Get the final HTML
        final finalHtml = await browser.getPageSource();
        final endTime = DateTime.now();
        final totalTimeMs = endTime.difference(startTime).inMilliseconds;

        logger?.info(
          'Lazy loading handled in ${totalTimeMs}ms. '
          'Scroll count: $scrollCount, Click count: $clickCount',
        );

        return LazyLoadResult(
          html: finalHtml,
          lazyLoadingDetected: true,
          lazyLoadType: detectionResult.type,
          scrollCount: scrollCount,
          clickCount: clickCount,
          totalTimeMs: totalTimeMs,
        );
      } finally {
        // Close the browser
        await browser.close();
      }
    } else {
      // If headless browser is disabled but lazy loading is detected,
      // return the original HTML with a warning
      logger?.warning(
        'Lazy loading detected but headless browser is disabled. '
        'Some content may not be available.',
      );
      return LazyLoadResult(
        html: html,
        lazyLoadingDetected: true,
        lazyLoadType: detectionResult.type,
        scrollCount: 0,
        clickCount: 0,
        totalTimeMs: 0,
      );
    }
  } catch (e) {
    logger?.error('Error handling lazy loading: $e');
    throw ScrapingException.lazyLoading(
      'Error handling lazy loading',
      originalException: e,
      isRetryable: true,
    );
  }
}