AdvancedWebScraper constructor
AdvancedWebScraper({
- required ProxyManager proxyManager,
- ProxyHttpClient? httpClient,
- EnhancedRateLimiter? rateLimiter,
- UserAgentRotator? userAgentRotator,
- CookieManager? cookieManager,
- RobotsTxtHandler? robotsTxtHandler,
- StreamingHtmlParser? streamingParser,
- MemoryEfficientParser? memoryEfficientParser,
- ScrapingTaskQueue? taskQueue,
- ScrapingLogger? logger,
- int defaultTimeout = 30000,
- int maxRetries = 3,
- bool handleCookies = true,
- bool followRedirects = true,
- bool respectRobotsTxt = true,
- int maxConcurrentTasks = 5,
Creates a new AdvancedWebScraper with the given parameters
Implementation
AdvancedWebScraper({
required ProxyManager proxyManager,
ProxyHttpClient? httpClient,
EnhancedRateLimiter? rateLimiter,
UserAgentRotator? userAgentRotator,
CookieManager? cookieManager,
RobotsTxtHandler? robotsTxtHandler,
StreamingHtmlParser? streamingParser,
MemoryEfficientParser? memoryEfficientParser,
ScrapingTaskQueue? taskQueue,
ScrapingLogger? logger,
int defaultTimeout = 30000,
int maxRetries = 3,
bool handleCookies = true,
bool followRedirects = true,
bool respectRobotsTxt = true,
int maxConcurrentTasks = 5,
}) : _proxyManager = proxyManager,
_httpClient =
httpClient ??
ProxyHttpClient(
proxyManager: proxyManager,
useValidatedProxies: true,
rotateProxies: true,
),
_rateLimiter =
rateLimiter ??
EnhancedRateLimiter(
robotsTxtHandler: robotsTxtHandler,
maxRetries: maxRetries,
),
_userAgentRotator = userAgentRotator ?? UserAgentRotator(),
_cookieManager = cookieManager ?? CookieManager(null),
_defaultTimeout = defaultTimeout,
_maxRetries = maxRetries,
_handleCookies = handleCookies,
_followRedirects = followRedirects,
_robotsTxtHandler = robotsTxtHandler,
_logger = logger ?? ScrapingLogger(),
_streamingParser =
streamingParser ?? StreamingHtmlParser(logger: logger),
_memoryEfficientParser =
memoryEfficientParser ?? MemoryEfficientParser(logger: logger),
_taskQueue =
taskQueue ??
ScrapingTaskQueue(
maxConcurrentTasks: maxConcurrentTasks,
logger: logger,
);