File size: 2,674 Bytes
a952689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9aeb1d
 
 
a952689
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import argparse
import functools
import logging
import os
import re
import string
import sys
from pathlib import Path
from typing import Union
from urllib.parse import urlparse

LOG = logging.getLogger(__name__)


class ArgumentError(ValueError):
    """
    Wrapper for argument error. This exception will be raised when the arguments are invalid.
    """

    pass


@functools.cache
def alphabet_id(n):
    letters = string.ascii_uppercase
    n_letters = len(letters)
    if n < n_letters:
        return letters[n]
    _id = ""

    while n > 0:
        remainder = (n - 1) % n_letters
        _id = letters[remainder] + _id
        n = (n - 1) // n_letters

    return _id


def is_url(text):
    return urlparse(text).scheme in ["http", "https"]


def extract_filename_and_extension(url):
    """
    Extract base filename and extension from the url.
    :param url: URL with filename and extension, e.g., https://example.com/images/pic.jpg?param=value
    :return: Base filename and extension, e.g., pic, jpg
    """
    parsed_url = urlparse(url)
    path = parsed_url.path
    filename = path.split("/")[-1]
    basename, *extension = filename.split(".")
    return basename, f".{extension[0]}" if extension else None


def build_image_paths(images_paths, recursive=False):
    filenames, urls = [], []
    valid_images = ["*.jpg", "*.gif", "*.png", "*.jpeg", "*.webp", "*.tif"]
    excluded_folders = ["debug", "log"]
    if isinstance(images_paths, str):
        images_paths = [images_paths]

    for filename in images_paths:
        if is_url(filename):
            urls.append(filename)
            continue
        p = Path(filename)
        if p.is_dir():
            images = [p.glob(pattern) for pattern in valid_images]
            if recursive:
                subfolders = [f for f in p.glob("*/") if f.name not in excluded_folders]
                images.extend(
                    [sp.rglob(pattern) for pattern in valid_images for sp in subfolders]
                )

            filenames.extend(images)
        elif p.is_file():
            filenames.append([p])
    paths = set([f.resolve() for fs in filenames for f in fs] + urls)
    paths = list(paths)
    if len(paths) == 0:
        raise FileNotFoundError("No valid images in the specified path.")
    # Sort paths by (first) number extracted from the filename string
    paths.sort(key=sort_file)
    return paths


def sort_file(path: Union[str, Path]):
    if isinstance(path, Path):
        basename = path.stem
    else:
        basename, *_ = extract_filename_and_extension(path)
    nums = re.findall(r"\d+", basename)
    return (int(nums[0]) if nums else float("inf")), basename