isAllowed method
Checks if a URL is allowed to be crawled
url
is the URL to check
userAgent
is the user agent to check against
Implementation
Future<bool> isAllowed(String url, [String? userAgent]) async {
if (!_respectRobotsTxt) {
return true;
}
final effectiveUserAgent = userAgent ?? _defaultUserAgent;
final domain = _extractDomain(url);
final path = _extractPath(url);
// Get the rules for this domain
final rules = await getRules(domain);
if (rules == null) {
// If we don't have rules for this domain, assume allowed
return true;
}
return rules.isAllowed(path, effectiveUserAgent);
}