ConcurrentWebScraper constructor

ConcurrentWebScraper({
  1. required ProxyManager proxyManager,
  2. int maxConcurrentTasks = 5,
  3. ProxyHttpClient? httpClient,
  4. String? defaultUserAgent,
  5. Map<String, String>? defaultHeaders,
  6. int defaultTimeout = 30000,
  7. int maxRetries = 3,
  8. ScrapingLogger? logger,
  9. RobotsTxtHandler? robotsTxtHandler,
  10. StreamingHtmlParser? streamingParser,
  11. bool respectRobotsTxt = true,
})

Creates a new ConcurrentWebScraper with the given parameters

proxyManager is the proxy manager for getting proxies maxConcurrentTasks is the maximum number of concurrent tasks httpClient is the HTTP client to use defaultUserAgent is the default user agent to use defaultHeaders are the default headers to use defaultTimeout is the default timeout for requests in milliseconds maxRetries is the maximum number of retry attempts logger is the logger for scraping operations robotsTxtHandler is the robots.txt handler streamingParser is the streaming HTML parser respectRobotsTxt whether to respect robots.txt rules

Implementation

ConcurrentWebScraper({
  required ProxyManager proxyManager,
  int maxConcurrentTasks = 5,
  ProxyHttpClient? httpClient,
  String? defaultUserAgent,
  Map<String, String>? defaultHeaders,
  int defaultTimeout = 30000,
  int maxRetries = 3,
  ScrapingLogger? logger,
  RobotsTxtHandler? robotsTxtHandler,
  StreamingHtmlParser? streamingParser,
  bool respectRobotsTxt = true,
}) : _webScraper = WebScraper(
       proxyManager: proxyManager,
       httpClient: httpClient,
       defaultUserAgent: defaultUserAgent,
       defaultHeaders: defaultHeaders,
       defaultTimeout: defaultTimeout,
       maxRetries: maxRetries,
       logger: logger,
       robotsTxtHandler: robotsTxtHandler,
       streamingParser: streamingParser,
       respectRobotsTxt: respectRobotsTxt,
     ),
     _taskQueue = ScrapingTaskQueue(
       maxConcurrentTasks: maxConcurrentTasks,
       logger: logger,
     ),
     _logger = logger ?? ScrapingLogger();