extractContentWithPagination method
Future<List<TextExtractionResult> >
extractContentWithPagination({
- required String url,
- required PaginationConfig paginationConfig,
- LazyLoadConfig? lazyLoadConfig,
- TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
- Map<
String, String> ? headers, - int? timeout,
- int? retries,
Extracts the main content from multiple pages with pagination
url
is the starting URL
paginationConfig
is the pagination configuration
lazyLoadConfig
is the lazy loading configuration (optional)
textExtractionOptions
are the text extraction options (optional)
headers
are additional headers to send with the request
timeout
is the timeout for the request in milliseconds
retries
is the number of retry attempts
Implementation
Future<List<TextExtractionResult>> extractContentWithPagination({
required String url,
required PaginationConfig paginationConfig,
LazyLoadConfig? lazyLoadConfig,
TextExtractionOptions textExtractionOptions = const TextExtractionOptions(),
Map<String, String>? headers,
int? timeout,
int? retries,
}) async {
// Create an extractor function
Future<TextExtractionResult> contentExtractor(
String html,
String pageUrl,
) async {
// Apply lazy loading if configured
if (lazyLoadConfig != null && lazyLoadConfig.handleLazyLoading) {
final lazyLoadResult = await lazyLoadHandler.handleLazyLoading(
url: pageUrl,
config: lazyLoadConfig,
headers: headers,
);
html = lazyLoadResult.html;
}
// Extract text from the HTML
return textExtractor.extractText(html, options: textExtractionOptions);
}
// Use the pagination handler with the content extractor
final result = await paginationHandler.scrapeWithPagination(
url: url,
config: paginationConfig,
extractor: contentExtractor,
headers: headers,
timeout: timeout,
retries: retries,
);
return result.results;
}