pathextract function
Extract all possible local file and directory from the provided text
Excludes:
- Pure filenames (without separators, e.g., "file.txt")
- Windows UNC paths (e.g., "\Server\Share\folder")
- URI schemes (e.g., "http://", "ftp://", "file://")
text The input string to search for paths.
Returns a List<String> of found local paths.
final text = r'''
This example text contains various paths:
/home/user/documents/report.pdf // File with separator, KEEP
/home/user/documents/ // Directory with separator, KEEP
C:\Users\Public\Downloads\image.jpg // File with separator, KEEP
C:\Users\Public\Downloads\ // Directory with separator, KEEP
../assets/icon.png // File with separator, KEEP
../assets/ // Directory with separator, KEEP
file.txt // Pure file, NO SEPARATOR, EXCLUDE
\\Server\Share\folder\data.xlsx // UNC Path, EXCLUDE
ftp://some.server/path/file.tar.gz // URI, EXCLUDE
https://example.com/downloads/software.zip // URI, EXCLUDE
''';
print(pathextract(text));
/*
[
'/home/user/documents/report.pdf', '/home/user/documents/',
'C:\Users\Public\Downloads\image.jpg', 'C:\Users\Public\Downloads\',
'../assets/icon.png','../assets/',
]
*/
Implementation
List<String> pathextract(String text) {
final Set<String> foundPaths = {}; // Use a Set to store unique paths
final isWindows = Platform.isWindows;
// --- Path Separator Definitions ---
final String osSep = p.separator; // e.g., '\' on Windows, '/' on Unix
final String winSep = pathSep; // isWindows ? '/' : '\\';
// --- Helper: Check for Local Path Separator ---
bool existLocalSeparator(String s) => s.contains(osSep) || s.contains(winSep);
// Splits the input text into "words" based on whitespace and common punctuation.
final List<String> words = text
.split(RegExp(r'''\s+|[()\[\]{}<>"\',;!?]+'''))
.where((s) => s.isNotEmpty) // Filter out empty strings from splitting
.toList();
final schemes = ['//', 'mailto:', 'tel:', 'urn:', 'data:'];
bool startsWithScheme(String text) =>
schemes.any((scheme) => text.startsWith(scheme));
// --- Main Path Detection Loop ---
for (final String word in words) {
// --- 1. Exclude common URI schemes or contain '://'.
final String lcWord = word.toLowerCase();
if (lcWord.contains('://')) continue;
if (startsWithScheme(lcWord)) continue;
// Exclude Windows UNC paths (e.g., \\Server\Share).
if (isWindows && word.startsWith(r'\\')) continue;
// --- 2. Core Path Characteristic: Must Contain a Separator ---
if (!existLocalSeparator(word)) continue;
// --- 3. Identify Likely Local Path Patterns ---
bool isLikelyLocalPath = false;
// Relative paths (e.g., "./", "../")
if (word.startsWith('./') ||
word.startsWith('../') ||
(isWindows && (word.startsWith('.\\') || word.startsWith('..\\')))) {
isLikelyLocalPath = true;
} else if (word.startsWith('~/')) {
// Unix-like home directory paths (e.g., "~/")
isLikelyLocalPath = true;
} else if (isWindows) {
if (word.length >= 3 &&
word[1] == ':' &&
(word[2] == osSep || word[2] == '/')) {
// Windows absolute paths: C:\..., D:/...
isLikelyLocalPath = true;
}
} else {
if (word.startsWith('/')) {
// Unix-like absolute paths: /home/user
isLikelyLocalPath = true;
}
}
// --- 4. Final Validation and Addition ---
if (isLikelyLocalPath) {
if (word.trim() == osSep || word.trim() == winSep) continue;
if (word == '.' || word == '..') continue;
foundPaths.add(word);
}
}
return foundPaths.toList();
}