WebScraper constructor

WebScraper({

required ProxyManager proxyManager,
ProxyHttpClient? httpClient,
String? defaultUserAgent,
Map<String, String>? defaultHeaders,
int defaultTimeout = 30000,
int maxRetries = 3,
AdaptiveScrapingStrategy? adaptiveStrategy,
SiteReputationTracker? reputationTracker,
ScrapingLogger? logger,
RobotsTxtHandler? robotsTxtHandler,
StreamingHtmlParser? streamingParser,
ContentValidator? contentValidator,
StructuredDataValidator? structuredDataValidator,
SelectorValidator? selectorValidator,
RateLimiter? rateLimiter,
RequestQueue? requestQueue,
StructuredDataExtractor? structuredDataExtractor,
ContentDetector? contentDetector,
TextExtractor? textExtractor,
HeadlessBrowser? headlessBrowser,
LazyLoadDetector? lazyLoadDetector,
LazyLoadHandler? lazyLoadHandler,
PaginationHandler? paginationHandler,
bool respectRobotsTxt = true,

})

Creates a new WebScraper with the given parameters

Implementation

WebScraper({
  required this.proxyManager,
  ProxyHttpClient? httpClient,
  String? defaultUserAgent,
  Map<String, String>? defaultHeaders,
  int defaultTimeout = 30000,
  int maxRetries = 3,
  AdaptiveScrapingStrategy? adaptiveStrategy,
  SiteReputationTracker? reputationTracker,
  ScrapingLogger? logger,
  RobotsTxtHandler? robotsTxtHandler,
  StreamingHtmlParser? streamingParser,
  ContentValidator? contentValidator,
  StructuredDataValidator? structuredDataValidator,
  SelectorValidator? selectorValidator,
  RateLimiter? rateLimiter,
  RequestQueue? requestQueue,
  StructuredDataExtractor? structuredDataExtractor,
  ContentDetector? contentDetector,
  TextExtractor? textExtractor,
  HeadlessBrowser? headlessBrowser,
  LazyLoadDetector? lazyLoadDetector,
  LazyLoadHandler? lazyLoadHandler,
  PaginationHandler? paginationHandler,
  bool respectRobotsTxt = true,
}) : _httpClient =
         httpClient ??
         ProxyHttpClient(
           proxyManager: proxyManager,
           useValidatedProxies: true,
           rotateProxies: true,
         ),
     _defaultUserAgent =
         defaultUserAgent ??
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
     _defaultHeaders = defaultHeaders ?? {},
     _defaultTimeout = defaultTimeout,
     _maxRetries = maxRetries,
     _reputationTracker = reputationTracker ?? SiteReputationTracker(),
     _logger = logger ?? ScrapingLogger(),
     _adaptiveStrategy =
         adaptiveStrategy ??
         AdaptiveScrapingStrategy(reputationTracker: reputationTracker),
     _robotsTxtHandler =
         robotsTxtHandler ??
         RobotsTxtHandler(
           proxyManager: proxyManager,
           logger: logger,
           defaultUserAgent: defaultUserAgent,
           respectRobotsTxt: respectRobotsTxt,
         ),
     _respectRobotsTxt = respectRobotsTxt,
     _streamingParser =
         streamingParser ?? StreamingHtmlParser(logger: logger),
     _contentValidator =
         contentValidator ?? ContentValidator(logger: Logger('WebScraper')),
     _structuredDataValidator =
         structuredDataValidator ??
         StructuredDataValidator(logger: Logger('WebScraper')),
     _selectorValidator =
         selectorValidator ?? SelectorValidator(logger: Logger('WebScraper')),
     _rateLimiter = rateLimiter ?? RateLimiter(logger: Logger('WebScraper')),
     _requestQueue =
         requestQueue ??
         RequestQueue(
           rateLimiter:
               rateLimiter ?? RateLimiter(logger: Logger('WebScraper')),
           logger: Logger('WebScraper'),
         ),
     _structuredDataExtractor =
         structuredDataExtractor ??
         StructuredDataExtractor(logger: Logger('WebScraper')),
     _contentDetector =
         contentDetector ?? ContentDetector(logger: Logger('WebScraper')),
     _textExtractor =
         textExtractor ?? TextExtractor(logger: Logger('WebScraper')),
     _headlessBrowser = headlessBrowser ?? HeadlessBrowser(),
     _lazyLoadHandler =
         lazyLoadHandler ??
         LazyLoadHandler(
           headlessBrowser: headlessBrowser ?? HeadlessBrowser(),
           logger: Logger('WebScraper'),
         ) {
  // Initialize pagination handler after construction
  _paginationHandler =
      paginationHandler ??
      PaginationHandler(webScraper: this, logger: Logger('WebScraper'));
}

WebScraper constructor

Implementation

WebScraper class