WebScraper constructor

WebScraper({
  1. required ProxyManager proxyManager,
  2. ProxyHttpClient? httpClient,
  3. String? defaultUserAgent,
  4. Map<String, String>? defaultHeaders,
  5. int defaultTimeout = 30000,
  6. int maxRetries = 3,
  7. AdaptiveScrapingStrategy? adaptiveStrategy,
  8. SiteReputationTracker? reputationTracker,
  9. ScrapingLogger? logger,
  10. RobotsTxtHandler? robotsTxtHandler,
  11. StreamingHtmlParser? streamingParser,
  12. ContentValidator? contentValidator,
  13. StructuredDataValidator? structuredDataValidator,
  14. SelectorValidator? selectorValidator,
  15. RateLimiter? rateLimiter,
  16. RequestQueue? requestQueue,
  17. StructuredDataExtractor? structuredDataExtractor,
  18. ContentDetector? contentDetector,
  19. TextExtractor? textExtractor,
  20. HeadlessBrowser? headlessBrowser,
  21. LazyLoadDetector? lazyLoadDetector,
  22. LazyLoadHandler? lazyLoadHandler,
  23. PaginationHandler? paginationHandler,
  24. bool respectRobotsTxt = true,
})

Creates a new WebScraper with the given parameters

Implementation

WebScraper({
  required this.proxyManager,
  ProxyHttpClient? httpClient,
  String? defaultUserAgent,
  Map<String, String>? defaultHeaders,
  int defaultTimeout = 30000,
  int maxRetries = 3,
  AdaptiveScrapingStrategy? adaptiveStrategy,
  SiteReputationTracker? reputationTracker,
  ScrapingLogger? logger,
  RobotsTxtHandler? robotsTxtHandler,
  StreamingHtmlParser? streamingParser,
  ContentValidator? contentValidator,
  StructuredDataValidator? structuredDataValidator,
  SelectorValidator? selectorValidator,
  RateLimiter? rateLimiter,
  RequestQueue? requestQueue,
  StructuredDataExtractor? structuredDataExtractor,
  ContentDetector? contentDetector,
  TextExtractor? textExtractor,
  HeadlessBrowser? headlessBrowser,
  LazyLoadDetector? lazyLoadDetector,
  LazyLoadHandler? lazyLoadHandler,
  PaginationHandler? paginationHandler,
  bool respectRobotsTxt = true,
}) : _httpClient =
         httpClient ??
         ProxyHttpClient(
           proxyManager: proxyManager,
           useValidatedProxies: true,
           rotateProxies: true,
         ),
     _defaultUserAgent =
         defaultUserAgent ??
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
     _defaultHeaders = defaultHeaders ?? {},
     _defaultTimeout = defaultTimeout,
     _maxRetries = maxRetries,
     _reputationTracker = reputationTracker ?? SiteReputationTracker(),
     _logger = logger ?? ScrapingLogger(),
     _adaptiveStrategy =
         adaptiveStrategy ??
         AdaptiveScrapingStrategy(reputationTracker: reputationTracker),
     _robotsTxtHandler =
         robotsTxtHandler ??
         RobotsTxtHandler(
           proxyManager: proxyManager,
           logger: logger,
           defaultUserAgent: defaultUserAgent,
           respectRobotsTxt: respectRobotsTxt,
         ),
     _respectRobotsTxt = respectRobotsTxt,
     _streamingParser =
         streamingParser ?? StreamingHtmlParser(logger: logger),
     _contentValidator =
         contentValidator ?? ContentValidator(logger: Logger('WebScraper')),
     _structuredDataValidator =
         structuredDataValidator ??
         StructuredDataValidator(logger: Logger('WebScraper')),
     _selectorValidator =
         selectorValidator ?? SelectorValidator(logger: Logger('WebScraper')),
     _rateLimiter = rateLimiter ?? RateLimiter(logger: Logger('WebScraper')),
     _requestQueue =
         requestQueue ??
         RequestQueue(
           rateLimiter:
               rateLimiter ?? RateLimiter(logger: Logger('WebScraper')),
           logger: Logger('WebScraper'),
         ),
     _structuredDataExtractor =
         structuredDataExtractor ??
         StructuredDataExtractor(logger: Logger('WebScraper')),
     _contentDetector =
         contentDetector ?? ContentDetector(logger: Logger('WebScraper')),
     _textExtractor =
         textExtractor ?? TextExtractor(logger: Logger('WebScraper')),
     _headlessBrowser = headlessBrowser ?? HeadlessBrowser(),
     _lazyLoadHandler =
         lazyLoadHandler ??
         LazyLoadHandler(
           headlessBrowser: headlessBrowser ?? HeadlessBrowser(),
           logger: Logger('WebScraper'),
         ) {
  // Initialize pagination handler after construction
  _paginationHandler =
      paginationHandler ??
      PaginationHandler(webScraper: this, logger: Logger('WebScraper'));
}