cleanScraperConfigUrl function
Cleans and normalizes a URL based on scraper configuration settings.
This function applies URL cleaning rules defined in the UrlCleaner configuration:
- Removes blacklisted query parameters
- Keeps only whitelisted query parameters (if specified)
- Adds additional parameters as defined in the configuration
- Normalizes the URL structure
Parameters:
url
: The original URL to cleancleaner
: URL cleaning configuration (optional)
Returns:
- Cleaned and normalized Uri object
Example:
final cleanedUrl = cleanScraperConfigUrl(
Uri.parse('https://example.com/page?utm_source=google&id=123'),
UrlCleaner(blacklistParams: ['utm_source']),
);
// Result: https://example.com/page?id=123
Implementation
Uri cleanScraperConfigUrl(Uri url, UrlCleaner? cleaner) {
/// Initialize parameters map
Map<String, String> params = {};
/// If no cleaner is provided, return URL with path only (no parameters)
if (cleaner == null) {
return Uri.https(url.authority, url.path);
}
/// Handle blacklisted parameters - remove specified parameters
if (cleaner.blacklistParams != null) {
url.queryParameters.forEach((key, value) {
if (!cleaner.blacklistParams!.contains(key)) {
params[key] = value;
}
});
} else {
/// If no blacklist, include all original parameters
params.addAll(url.queryParameters);
}
/// Handle whitelisted parameters - keep only specified parameters
if (cleaner.whitelistParams != null) {
List<String> toRemove = [];
params.forEach((key, value) {
if (!cleaner.whitelistParams!.contains(key)) {
toRemove.add(key);
}
});
params.removeWhere((k, v) => toRemove.contains(k));
} else {
/// If whitelist is specified but no parameters match, clear all
params.clear();
}
/// Add additional parameters as specified in the configuration
if (cleaner.appendParams != null) {
cleaner.appendParams!.forEach((key, value) {
params[key] = value;
});
}
/// Construct and return the cleaned URL
return Uri.https(url.authority, url.path, params);
}