findScraperConfig function
Finds the appropriate scraper configuration for a given URL.
This function searches through the scraper configuration map to find a configuration that matches the URL's host and path patterns.
The matching process:
- Checks if the URL's host contains any of the configuration keys
- For matching hosts, checks each scraper configuration's path patterns
- Matches path patterns using exact string matching or regex patterns
- Returns the first matching configuration found
Parameters:
scraperConfigMap
: Map of domain names to lists of scraper configurationsurl
: The URL to find a configuration for
Returns:
- Matching ScraperConfig if found, null otherwise
Example:
final config = findScraperConfig(
scraperConfigMap: {
'example.com': [ScraperConfig(pathPatterns: ['/products'], ...)]
},
url: Uri.parse('https://example.com/products/123'),
);
Implementation
ScraperConfig? findScraperConfig({
required ScraperConfigMap scraperConfigMap,
required Uri url,
}) {
for (final host in scraperConfigMap.configs.keys) {
// exact or subdomain
final h = host.toLowerCase();
final u = url.host.toLowerCase();
final hostMatches = (u == h) || u.endsWith('.$h');
if (!hostMatches) continue;
final list = scraperConfigMap.configs[host];
if (list == null || list.isEmpty) continue;
// if useNth provided, prefer that index; fallback to 0 when out of range
final nth = scraperConfigMap.useNth;
if (nth != null) {
final safeIndex = (nth < 0 || nth >= list.length) ? 0 : nth;
final hit = _checkPathPatterns(list[safeIndex], url);
if (hit != null) return hit;
continue;
}
// otherwise, try each config until one matches
for (final cfg in list) {
final hit = _checkPathPatterns(cfg, url);
if (hit != null) return hit;
}
}
return null;
}