RobotsTxtRules.parse constructor

RobotsTxtRules.parse(
  1. String content
)

Parses robots.txt content into rules

Implementation

factory RobotsTxtRules.parse(String content) {
  final rules = RobotsTxtRules.empty();
  final lines = content.split('\n');
  String? currentUserAgent;

  for (var line in lines) {
    // Remove comments
    final commentIndex = line.indexOf('#');
    if (commentIndex >= 0) {
      line = line.substring(0, commentIndex);
    }

    // Trim whitespace
    line = line.trim();
    if (line.isEmpty) continue;

    // Parse the line
    if (line.toLowerCase().startsWith('user-agent:')) {
      final userAgent = _extractValue(line, 'user-agent:');
      if (userAgent.isNotEmpty) {
        currentUserAgent = userAgent.toLowerCase();
      }
    } else if (line.toLowerCase().startsWith('disallow:')) {
      if (currentUserAgent != null) {
        final path = _extractValue(line, 'disallow:');
        if (path.isNotEmpty) {
          rules._disallowedPaths.putIfAbsent(currentUserAgent, () => []);
          rules._disallowedPaths[currentUserAgent]!.add(path);
        }
      }
    } else if (line.toLowerCase().startsWith('allow:')) {
      if (currentUserAgent != null) {
        final path = _extractValue(line, 'allow:');
        if (path.isNotEmpty) {
          rules._allowedPaths.putIfAbsent(currentUserAgent, () => []);
          rules._allowedPaths[currentUserAgent]!.add(path);
        }
      }
    } else if (line.toLowerCase().startsWith('crawl-delay:')) {
      if (currentUserAgent != null) {
        final delayStr = _extractValue(line, 'crawl-delay:');
        if (delayStr.isNotEmpty) {
          try {
            final delay = int.parse(delayStr);
            rules._crawlDelays[currentUserAgent] = delay;
          } catch (_) {
            // Ignore invalid crawl delays
          }
        }
      }
    }
  }

  return rules;
}