extractData method
Extracts data from a URL with priority
url
is the URL to fetch
selector
is the CSS selector to use
attribute
is the attribute to extract (optional)
asText
whether to extract the text content (default: true)
priority
is the priority of the task (higher values = higher priority)
headers
are additional headers to send with the request
timeout
is the timeout for the request in milliseconds
retries
is the number of retry attempts
ignoreRobotsTxt
whether to ignore robots.txt rules (default: false)
Implementation
Future<List<String>> extractData({
required String url,
required String selector,
String? attribute,
bool asText = true,
int priority = 0,
Map<String, String>? headers,
int? timeout,
int? retries,
bool ignoreRobotsTxt = false,
}) {
return _taskQueue.addTask<List<String>>(
task: () async {
// First fetch the HTML
final html = await _webScraper.fetchHtml(
url: url,
headers: headers,
timeout: timeout,
retries: retries,
ignoreRobotsTxt: ignoreRobotsTxt,
);
// Then extract the data from the HTML
return _webScraper.extractData(
html: html,
selector: selector,
attribute: attribute,
asText: asText,
);
},
priority: priority,
taskName: 'ExtractData-$url',
);
}