pathextract function

List<String> pathextract(
  1. String text
)

Extract all possible local file and directory from the provided text

Excludes:

  • Pure filenames (without separators, e.g., "file.txt")
  • Windows UNC paths (e.g., "\Server\Share\folder")
  • URI schemes (e.g., "http://", "ftp://", "file://")

text The input string to search for paths. Returns a List<String> of found local paths.

final text = r'''
This example text contains various paths:
/home/user/documents/report.pdf        // File with separator, KEEP
/home/user/documents/                  // Directory with separator, KEEP
C:\Users\Public\Downloads\image.jpg    // File with separator, KEEP
C:\Users\Public\Downloads\             // Directory with separator, KEEP
../assets/icon.png                     // File with separator, KEEP
../assets/                             // Directory with separator, KEEP
file.txt                               // Pure file, NO SEPARATOR, EXCLUDE
\\Server\Share\folder\data.xlsx        // UNC Path, EXCLUDE
ftp://some.server/path/file.tar.gz     // URI, EXCLUDE
https://example.com/downloads/software.zip // URI, EXCLUDE
''';
print(pathextract(text));
/*
[
'/home/user/documents/report.pdf', '/home/user/documents/',
'C:\Users\Public\Downloads\image.jpg', 'C:\Users\Public\Downloads\',
'../assets/icon.png','../assets/',
]
*/

Implementation

List<String> pathextract(String text) {
  final Set<String> foundPaths = {}; // Use a Set to store unique paths

  final isWindows = Platform.isWindows;
  // --- Path Separator Definitions ---
  final String osSep = p.separator; // e.g., '\' on Windows, '/' on Unix
  final String winSep = pathSep; // isWindows ? '/' : '\\';
  // --- Helper: Check for Local Path Separator ---
  bool existLocalSeparator(String s) => s.contains(osSep) || s.contains(winSep);

  // Splits the input text into "words" based on whitespace and common punctuation.
  final List<String> words = text
      .split(RegExp(r'''\s+|[()\[\]{}<>"\',;!?]+'''))
      .where((s) => s.isNotEmpty) // Filter out empty strings from splitting
      .toList();

  final schemes = ['//', 'mailto:', 'tel:', 'urn:', 'data:'];
  bool startsWithScheme(String text) =>
      schemes.any((scheme) => text.startsWith(scheme));

  // --- Main Path Detection Loop ---
  for (final String word in words) {
    // --- 1. Exclude common URI schemes or contain '://'.
    final String lcWord = word.toLowerCase();
    if (lcWord.contains('://')) continue;
    if (startsWithScheme(lcWord)) continue;

    // Exclude Windows UNC paths (e.g., \\Server\Share).
    if (isWindows && word.startsWith(r'\\')) continue;
    // --- 2. Core Path Characteristic: Must Contain a Separator ---
    if (!existLocalSeparator(word)) continue;

    // --- 3. Identify Likely Local Path Patterns ---
    bool isLikelyLocalPath = false;
    // Relative paths (e.g., "./", "../")
    if (word.startsWith('./') ||
        word.startsWith('../') ||
        (isWindows && (word.startsWith('.\\') || word.startsWith('..\\')))) {
      isLikelyLocalPath = true;
    } else if (word.startsWith('~/')) {
      // Unix-like home directory paths (e.g., "~/")
      isLikelyLocalPath = true;
    } else if (isWindows) {
      if (word.length >= 3 &&
          word[1] == ':' &&
          (word[2] == osSep || word[2] == '/')) {
        // Windows absolute paths: C:\..., D:/...
        isLikelyLocalPath = true;
      }
    } else {
      if (word.startsWith('/')) {
        // Unix-like absolute paths: /home/user
        isLikelyLocalPath = true;
      }
    }

    // --- 4. Final Validation and Addition ---
    if (isLikelyLocalPath) {
      if (word.trim() == osSep || word.trim() == winSep) continue;
      if (word == '.' || word == '..') continue;
      foundPaths.add(word);
    }
  }

  return foundPaths.toList();
}