import argparse import functools import logging import os import re import string import sys from pathlib import Path from typing import Union from urllib.parse import urlparse LOG = logging.getLogger(__name__) class ArgumentError(ValueError): """ Wrapper for argument error. This exception will be raised when the arguments are invalid. """ pass @functools.cache def alphabet_id(n): letters = string.ascii_uppercase n_letters = len(letters) if n < n_letters: return letters[n] _id = "" while n > 0: remainder = (n - 1) % n_letters _id = letters[remainder] + _id n = (n - 1) // n_letters return _id def is_url(text): return urlparse(text).scheme in ["http", "https"] def extract_filename_and_extension(url): """ Extract base filename and extension from the url. :param url: URL with filename and extension, e.g., https://example.com/images/pic.jpg?param=value :return: Base filename and extension, e.g., pic, jpg """ parsed_url = urlparse(url) path = parsed_url.path filename = path.split("/")[-1] basename, *extension = filename.split(".") return basename, f".{extension[0]}" if extension else None def build_image_paths(images_paths, recursive=False): filenames, urls = [], [] valid_images = ["*.jpg", "*.gif", "*.png", "*.jpeg", "*.webp", "*.tif"] excluded_folders = ["debug", "log"] if isinstance(images_paths, str): images_paths = [images_paths] for filename in images_paths: if is_url(filename): urls.append(filename) continue p = Path(filename) if p.is_dir(): images = [p.glob(pattern) for pattern in valid_images] if recursive: subfolders = [f for f in p.glob("*/") if f.name not in excluded_folders] images.extend( [sp.rglob(pattern) for pattern in valid_images for sp in subfolders] ) filenames.extend(images) elif p.is_file(): filenames.append([p]) paths = set([f.resolve() for fs in filenames for f in fs] + urls) paths = list(paths) if len(paths) == 0: raise FileNotFoundError("No valid images in the specified path.") # Sort paths by (first) number extracted from the filename string paths.sort(key=sort_file) return paths def sort_file(path: Union[str, Path]): if isinstance(path, Path): basename = path.stem else: basename, *_ = extract_filename_and_extension(path) nums = re.findall(r"\d+", basename) return (int(nums[0]) if nums else float("inf")), basename