| | """distutils.filelist |
| | |
| | Provides the FileList class, used for poking about the filesystem |
| | and building lists of files. |
| | """ |
| |
|
| | from __future__ import annotations |
| |
|
| | import fnmatch |
| | import functools |
| | import os |
| | import re |
| | from collections.abc import Iterable |
| | from typing import Literal, overload |
| |
|
| | from ._log import log |
| | from .errors import DistutilsInternalError, DistutilsTemplateError |
| | from .util import convert_path |
| |
|
| |
|
| | class FileList: |
| | """A list of files built by on exploring the filesystem and filtered by |
| | applying various patterns to what we find there. |
| | |
| | Instance attributes: |
| | dir |
| | directory from which files will be taken -- only used if |
| | 'allfiles' not supplied to constructor |
| | files |
| | list of filenames currently being built/filtered/manipulated |
| | allfiles |
| | complete list of files under consideration (ie. without any |
| | filtering applied) |
| | """ |
| |
|
| | def __init__(self, warn: object = None, debug_print: object = None) -> None: |
| | |
| | |
| | self.allfiles: Iterable[str] | None = None |
| | self.files: list[str] = [] |
| |
|
| | def set_allfiles(self, allfiles: Iterable[str]) -> None: |
| | self.allfiles = allfiles |
| |
|
| | def findall(self, dir: str | os.PathLike[str] = os.curdir) -> None: |
| | self.allfiles = findall(dir) |
| |
|
| | def debug_print(self, msg: object) -> None: |
| | """Print 'msg' to stdout if the global DEBUG (taken from the |
| | DISTUTILS_DEBUG environment variable) flag is true. |
| | """ |
| | from distutils.debug import DEBUG |
| |
|
| | if DEBUG: |
| | print(msg) |
| |
|
| | |
| |
|
| | def append(self, item: str) -> None: |
| | self.files.append(item) |
| |
|
| | def extend(self, items: Iterable[str]) -> None: |
| | self.files.extend(items) |
| |
|
| | def sort(self) -> None: |
| | |
| | sortable_files = sorted(map(os.path.split, self.files)) |
| | self.files = [] |
| | for sort_tuple in sortable_files: |
| | self.files.append(os.path.join(*sort_tuple)) |
| |
|
| | |
| |
|
| | def remove_duplicates(self) -> None: |
| | |
| | for i in range(len(self.files) - 1, 0, -1): |
| | if self.files[i] == self.files[i - 1]: |
| | del self.files[i] |
| |
|
| | |
| |
|
| | def _parse_template_line(self, line): |
| | words = line.split() |
| | action = words[0] |
| |
|
| | patterns = dir = dir_pattern = None |
| |
|
| | if action in ('include', 'exclude', 'global-include', 'global-exclude'): |
| | if len(words) < 2: |
| | raise DistutilsTemplateError( |
| | f"'{action}' expects <pattern1> <pattern2> ..." |
| | ) |
| | patterns = [convert_path(w) for w in words[1:]] |
| | elif action in ('recursive-include', 'recursive-exclude'): |
| | if len(words) < 3: |
| | raise DistutilsTemplateError( |
| | f"'{action}' expects <dir> <pattern1> <pattern2> ..." |
| | ) |
| | dir = convert_path(words[1]) |
| | patterns = [convert_path(w) for w in words[2:]] |
| | elif action in ('graft', 'prune'): |
| | if len(words) != 2: |
| | raise DistutilsTemplateError( |
| | f"'{action}' expects a single <dir_pattern>" |
| | ) |
| | dir_pattern = convert_path(words[1]) |
| | else: |
| | raise DistutilsTemplateError(f"unknown action '{action}'") |
| |
|
| | return (action, patterns, dir, dir_pattern) |
| |
|
| | def process_template_line(self, line: str) -> None: |
| | |
| | |
| | |
| | |
| | |
| | (action, patterns, dir, dir_pattern) = self._parse_template_line(line) |
| |
|
| | |
| | |
| | |
| | if action == 'include': |
| | self.debug_print("include " + ' '.join(patterns)) |
| | for pattern in patterns: |
| | if not self.include_pattern(pattern, anchor=True): |
| | log.warning("warning: no files found matching '%s'", pattern) |
| |
|
| | elif action == 'exclude': |
| | self.debug_print("exclude " + ' '.join(patterns)) |
| | for pattern in patterns: |
| | if not self.exclude_pattern(pattern, anchor=True): |
| | log.warning( |
| | "warning: no previously-included files found matching '%s'", |
| | pattern, |
| | ) |
| |
|
| | elif action == 'global-include': |
| | self.debug_print("global-include " + ' '.join(patterns)) |
| | for pattern in patterns: |
| | if not self.include_pattern(pattern, anchor=False): |
| | log.warning( |
| | ( |
| | "warning: no files found matching '%s' " |
| | "anywhere in distribution" |
| | ), |
| | pattern, |
| | ) |
| |
|
| | elif action == 'global-exclude': |
| | self.debug_print("global-exclude " + ' '.join(patterns)) |
| | for pattern in patterns: |
| | if not self.exclude_pattern(pattern, anchor=False): |
| | log.warning( |
| | ( |
| | "warning: no previously-included files matching " |
| | "'%s' found anywhere in distribution" |
| | ), |
| | pattern, |
| | ) |
| |
|
| | elif action == 'recursive-include': |
| | self.debug_print("recursive-include {} {}".format(dir, ' '.join(patterns))) |
| | for pattern in patterns: |
| | if not self.include_pattern(pattern, prefix=dir): |
| | msg = "warning: no files found matching '%s' under directory '%s'" |
| | log.warning(msg, pattern, dir) |
| |
|
| | elif action == 'recursive-exclude': |
| | self.debug_print("recursive-exclude {} {}".format(dir, ' '.join(patterns))) |
| | for pattern in patterns: |
| | if not self.exclude_pattern(pattern, prefix=dir): |
| | log.warning( |
| | ( |
| | "warning: no previously-included files matching " |
| | "'%s' found under directory '%s'" |
| | ), |
| | pattern, |
| | dir, |
| | ) |
| |
|
| | elif action == 'graft': |
| | self.debug_print("graft " + dir_pattern) |
| | if not self.include_pattern(None, prefix=dir_pattern): |
| | log.warning("warning: no directories found matching '%s'", dir_pattern) |
| |
|
| | elif action == 'prune': |
| | self.debug_print("prune " + dir_pattern) |
| | if not self.exclude_pattern(None, prefix=dir_pattern): |
| | log.warning( |
| | ("no previously-included directories found matching '%s'"), |
| | dir_pattern, |
| | ) |
| | else: |
| | raise DistutilsInternalError( |
| | f"this cannot happen: invalid action '{action}'" |
| | ) |
| |
|
| | |
| | @overload |
| | def include_pattern( |
| | self, |
| | pattern: str, |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | is_regex: Literal[False] = False, |
| | ) -> bool: ... |
| | @overload |
| | def include_pattern( |
| | self, |
| | pattern: str | re.Pattern[str], |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | *, |
| | is_regex: Literal[True], |
| | ) -> bool: ... |
| | @overload |
| | def include_pattern( |
| | self, |
| | pattern: str | re.Pattern[str], |
| | anchor: bool, |
| | prefix: str | None, |
| | is_regex: Literal[True], |
| | ) -> bool: ... |
| | def include_pattern( |
| | self, |
| | pattern: str | re.Pattern, |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | is_regex: bool = False, |
| | ) -> bool: |
| | """Select strings (presumably filenames) from 'self.files' that |
| | match 'pattern', a Unix-style wildcard (glob) pattern. Patterns |
| | are not quite the same as implemented by the 'fnmatch' module: '*' |
| | and '?' match non-special characters, where "special" is platform- |
| | dependent: slash on Unix; colon, slash, and backslash on |
| | DOS/Windows; and colon on Mac OS. |
| | |
| | If 'anchor' is true (the default), then the pattern match is more |
| | stringent: "*.py" will match "foo.py" but not "foo/bar.py". If |
| | 'anchor' is false, both of these will match. |
| | |
| | If 'prefix' is supplied, then only filenames starting with 'prefix' |
| | (itself a pattern) and ending with 'pattern', with anything in between |
| | them, will match. 'anchor' is ignored in this case. |
| | |
| | If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and |
| | 'pattern' is assumed to be either a string containing a regex or a |
| | regex object -- no translation is done, the regex is just compiled |
| | and used as-is. |
| | |
| | Selected strings will be added to self.files. |
| | |
| | Return True if files are found, False otherwise. |
| | """ |
| | |
| | files_found = False |
| | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) |
| | self.debug_print(f"include_pattern: applying regex r'{pattern_re.pattern}'") |
| |
|
| | |
| | if self.allfiles is None: |
| | self.findall() |
| |
|
| | for name in self.allfiles: |
| | if pattern_re.search(name): |
| | self.debug_print(" adding " + name) |
| | self.files.append(name) |
| | files_found = True |
| | return files_found |
| |
|
| | @overload |
| | def exclude_pattern( |
| | self, |
| | pattern: str, |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | is_regex: Literal[False] = False, |
| | ) -> bool: ... |
| | @overload |
| | def exclude_pattern( |
| | self, |
| | pattern: str | re.Pattern[str], |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | *, |
| | is_regex: Literal[True], |
| | ) -> bool: ... |
| | @overload |
| | def exclude_pattern( |
| | self, |
| | pattern: str | re.Pattern[str], |
| | anchor: bool, |
| | prefix: str | None, |
| | is_regex: Literal[True], |
| | ) -> bool: ... |
| | def exclude_pattern( |
| | self, |
| | pattern: str | re.Pattern, |
| | anchor: bool = True, |
| | prefix: str | None = None, |
| | is_regex: bool = False, |
| | ) -> bool: |
| | """Remove strings (presumably filenames) from 'files' that match |
| | 'pattern'. Other parameters are the same as for |
| | 'include_pattern()', above. |
| | The list 'self.files' is modified in place. |
| | Return True if files are found, False otherwise. |
| | """ |
| | files_found = False |
| | pattern_re = translate_pattern(pattern, anchor, prefix, is_regex) |
| | self.debug_print(f"exclude_pattern: applying regex r'{pattern_re.pattern}'") |
| | for i in range(len(self.files) - 1, -1, -1): |
| | if pattern_re.search(self.files[i]): |
| | self.debug_print(" removing " + self.files[i]) |
| | del self.files[i] |
| | files_found = True |
| | return files_found |
| |
|
| |
|
| | |
| |
|
| |
|
| | def _find_all_simple(path): |
| | """ |
| | Find all files under 'path' |
| | """ |
| | all_unique = _UniqueDirs.filter(os.walk(path, followlinks=True)) |
| | results = ( |
| | os.path.join(base, file) for base, dirs, files in all_unique for file in files |
| | ) |
| | return filter(os.path.isfile, results) |
| |
|
| |
|
| | class _UniqueDirs(set): |
| | """ |
| | Exclude previously-seen dirs from walk results, |
| | avoiding infinite recursion. |
| | Ref https://bugs.python.org/issue44497. |
| | """ |
| |
|
| | def __call__(self, walk_item): |
| | """ |
| | Given an item from an os.walk result, determine |
| | if the item represents a unique dir for this instance |
| | and if not, prevent further traversal. |
| | """ |
| | base, dirs, files = walk_item |
| | stat = os.stat(base) |
| | candidate = stat.st_dev, stat.st_ino |
| | found = candidate in self |
| | if found: |
| | del dirs[:] |
| | self.add(candidate) |
| | return not found |
| |
|
| | @classmethod |
| | def filter(cls, items): |
| | return filter(cls(), items) |
| |
|
| |
|
| | def findall(dir: str | os.PathLike[str] = os.curdir): |
| | """ |
| | Find all files under 'dir' and return the list of full filenames. |
| | Unless dir is '.', return full filenames with dir prepended. |
| | """ |
| | files = _find_all_simple(dir) |
| | if dir == os.curdir: |
| | make_rel = functools.partial(os.path.relpath, start=dir) |
| | files = map(make_rel, files) |
| | return list(files) |
| |
|
| |
|
| | def glob_to_re(pattern): |
| | """Translate a shell-like glob pattern to a regular expression; return |
| | a string containing the regex. Differs from 'fnmatch.translate()' in |
| | that '*' does not match "special characters" (which are |
| | platform-specific). |
| | """ |
| | pattern_re = fnmatch.translate(pattern) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | sep = os.sep |
| | if os.sep == '\\': |
| | |
| | |
| | sep = r'\\\\' |
| | escaped = rf'\1[^{sep}]' |
| | pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re) |
| | return pattern_re |
| |
|
| |
|
| | def translate_pattern(pattern, anchor=True, prefix=None, is_regex=False): |
| | """Translate a shell-like wildcard pattern to a compiled regular |
| | expression. Return the compiled regex. If 'is_regex' true, |
| | then 'pattern' is directly compiled to a regex (if it's a string) |
| | or just returned as-is (assumes it's a regex object). |
| | """ |
| | if is_regex: |
| | if isinstance(pattern, str): |
| | return re.compile(pattern) |
| | else: |
| | return pattern |
| |
|
| | |
| | start, _, end = glob_to_re('_').partition('_') |
| |
|
| | if pattern: |
| | pattern_re = glob_to_re(pattern) |
| | assert pattern_re.startswith(start) and pattern_re.endswith(end) |
| | else: |
| | pattern_re = '' |
| |
|
| | if prefix is not None: |
| | prefix_re = glob_to_re(prefix) |
| | assert prefix_re.startswith(start) and prefix_re.endswith(end) |
| | prefix_re = prefix_re[len(start) : len(prefix_re) - len(end)] |
| | sep = os.sep |
| | if os.sep == '\\': |
| | sep = r'\\' |
| | pattern_re = pattern_re[len(start) : len(pattern_re) - len(end)] |
| | pattern_re = rf'{start}\A{prefix_re}{sep}.*{pattern_re}{end}' |
| | else: |
| | if anchor: |
| | pattern_re = rf'{start}\A{pattern_re[len(start) :]}' |
| |
|
| | return re.compile(pattern_re) |
| |
|