Spaces:
Paused
Paused
Upload hf_backend/filename_utils.py with huggingface_hub
Browse files- hf_backend/filename_utils.py +52 -0
hf_backend/filename_utils.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
from pathlib import PurePosixPath
|
| 5 |
+
from urllib.parse import unquote
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def normalize_source_filename(
|
| 9 |
+
name: str,
|
| 10 |
+
*,
|
| 11 |
+
default_stem: str = "downloaded_book",
|
| 12 |
+
default_extension: str = "",
|
| 13 |
+
) -> str:
|
| 14 |
+
raw_name = str(name or "").strip()
|
| 15 |
+
decoded_name = unquote(raw_name)
|
| 16 |
+
if not decoded_name:
|
| 17 |
+
return _compose_filename(default_stem, default_extension)
|
| 18 |
+
|
| 19 |
+
basename = PurePosixPath(decoded_name).name or decoded_name
|
| 20 |
+
path = PurePosixPath(basename)
|
| 21 |
+
extension = path.suffix.lower() or _normalize_extension(default_extension)
|
| 22 |
+
stem = path.name[: -len(path.suffix)] if path.suffix else path.name
|
| 23 |
+
|
| 24 |
+
match = re.match(r"^(.+?)\s+--\s+", stem)
|
| 25 |
+
if match:
|
| 26 |
+
stem = match.group(1)
|
| 27 |
+
|
| 28 |
+
stem = stem.strip()
|
| 29 |
+
stem = re.sub(r"\s+", "_", stem)
|
| 30 |
+
stem = re.sub(r"[^a-zA-Z0-9_]", "_", stem)
|
| 31 |
+
stem = re.sub(r"_+", "_", stem)
|
| 32 |
+
stem = stem.strip("_")
|
| 33 |
+
if not stem:
|
| 34 |
+
stem = default_stem
|
| 35 |
+
|
| 36 |
+
return _compose_filename(stem, extension)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _normalize_extension(value: str) -> str:
|
| 40 |
+
extension = str(value or "").strip().lower()
|
| 41 |
+
if not extension:
|
| 42 |
+
return ""
|
| 43 |
+
if not extension.startswith("."):
|
| 44 |
+
extension = f".{extension}"
|
| 45 |
+
return extension
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _compose_filename(stem: str, extension: str) -> str:
|
| 49 |
+
clean_stem = re.sub(r"[^a-zA-Z0-9_]", "_", str(stem or "").strip())
|
| 50 |
+
clean_stem = re.sub(r"_+", "_", clean_stem).strip("_") or "downloaded_book"
|
| 51 |
+
clean_extension = _normalize_extension(extension)
|
| 52 |
+
return f"{clean_stem}{clean_extension}"
|