RobotsTxtHandler constructor
RobotsTxtHandler({
- Client? httpClient,
- ProxyManager? proxyManager,
- ScrapingLogger? logger,
- String? defaultUserAgent,
- bool respectRobotsTxt = true,
- int cacheExpirationMs = 3600000,
Creates a new RobotsTxtHandler with the given parameters
httpClient
is the HTTP client to use for fetching robots.txt files
logger
is the logger to use for logging operations
defaultUserAgent
is the default user agent to use for fetching robots.txt files
respectRobotsTxt
determines whether to respect robots.txt rules
cacheExpirationMs
is the cache expiration time in milliseconds
Implementation
RobotsTxtHandler({
http.Client? httpClient,
ProxyManager? proxyManager,
ScrapingLogger? logger,
String? defaultUserAgent,
bool respectRobotsTxt = true,
int cacheExpirationMs = 3600000, // 1 hour
}) : _httpClient =
httpClient ??
(proxyManager != null
? ProxyHttpClient(
proxyManager: proxyManager,
useValidatedProxies: true,
rotateProxies: true,
)
: http.Client()),
_logger = logger ?? ScrapingLogger(),
_defaultUserAgent =
defaultUserAgent ??
'Mozilla/5.0 (compatible; PivoxBot/1.0; +https://github.com/Liv-Coder/Pivox-)',
_respectRobotsTxt = respectRobotsTxt,
_cacheExpirationMs = cacheExpirationMs;