RobotsTxtRules.parse constructor
RobotsTxtRules.parse(
- String content
Parses robots.txt content into rules
Implementation
factory RobotsTxtRules.parse(String content) {
final rules = RobotsTxtRules.empty();
final lines = content.split('\n');
String? currentUserAgent;
for (var line in lines) {
// Remove comments
final commentIndex = line.indexOf('#');
if (commentIndex >= 0) {
line = line.substring(0, commentIndex);
}
// Trim whitespace
line = line.trim();
if (line.isEmpty) continue;
// Parse the line
if (line.toLowerCase().startsWith('user-agent:')) {
final userAgent = _extractValue(line, 'user-agent:');
if (userAgent.isNotEmpty) {
currentUserAgent = userAgent.toLowerCase();
}
} else if (line.toLowerCase().startsWith('disallow:')) {
if (currentUserAgent != null) {
final path = _extractValue(line, 'disallow:');
if (path.isNotEmpty) {
rules._disallowedPaths.putIfAbsent(currentUserAgent, () => []);
rules._disallowedPaths[currentUserAgent]!.add(path);
}
}
} else if (line.toLowerCase().startsWith('allow:')) {
if (currentUserAgent != null) {
final path = _extractValue(line, 'allow:');
if (path.isNotEmpty) {
rules._allowedPaths.putIfAbsent(currentUserAgent, () => []);
rules._allowedPaths[currentUserAgent]!.add(path);
}
}
} else if (line.toLowerCase().startsWith('crawl-delay:')) {
if (currentUserAgent != null) {
final delayStr = _extractValue(line, 'crawl-delay:');
if (delayStr.isNotEmpty) {
try {
final delay = int.parse(delayStr);
rules._crawlDelays[currentUserAgent] = delay;
} catch (_) {
// Ignore invalid crawl delays
}
}
}
}
}
return rules;
}