extractContentWithPagination method

Future<List<TextExtractionResult>> extractContentWithPagination({
  1. required String url,
  2. required PaginationConfig paginationConfig,
  3. LazyLoadConfig? lazyLoadConfig,
  4. TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
  5. Map<String, String>? headers,
  6. int? timeout,
  7. int? retries,
})

Extracts the main content from multiple pages with pagination

url is the starting URL paginationConfig is the pagination configuration lazyLoadConfig is the lazy loading configuration (optional) textExtractionOptions are the text extraction options (optional) headers are additional headers to send with the request timeout is the timeout for the request in milliseconds retries is the number of retry attempts

Implementation

Future<List<TextExtractionResult>> extractContentWithPagination({
  required String url,
  required PaginationConfig paginationConfig,
  LazyLoadConfig? lazyLoadConfig,
  TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
  Map<String, String>? headers,
  int? timeout,
  int? retries,
}) async {
  // Create an extractor function
  Future<TextExtractionResult> contentExtractor(
    String html,
    String pageUrl,
  ) async {
    // Apply lazy loading if configured
    if (lazyLoadConfig != null && lazyLoadConfig.handleLazyLoading) {
      final lazyLoadResult = await lazyLoadHandler.handleLazyLoading(
        url: pageUrl,
        config: lazyLoadConfig,
        headers: headers,
      );
      html = lazyLoadResult.html;
    }

    // Extract text from the HTML
    return textExtractor.extractText(html, options: textExtractionOptions);
  }

  // Use the pagination handler with the content extractor
  final result = await paginationHandler.scrapeWithPagination(
    url: url,
    config: paginationConfig,
    extractor: contentExtractor,
    headers: headers,
    timeout: timeout,
    retries: retries,
  );

  return result.results;
}