fromozu commited on
Commit
92b449a
·
verified ·
1 Parent(s): 35b16a0

Upload hf_backend/filename_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. hf_backend/filename_utils.py +52 -0
hf_backend/filename_utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from pathlib import PurePosixPath
5
+ from urllib.parse import unquote
6
+
7
+
8
+ def normalize_source_filename(
9
+ name: str,
10
+ *,
11
+ default_stem: str = "downloaded_book",
12
+ default_extension: str = "",
13
+ ) -> str:
14
+ raw_name = str(name or "").strip()
15
+ decoded_name = unquote(raw_name)
16
+ if not decoded_name:
17
+ return _compose_filename(default_stem, default_extension)
18
+
19
+ basename = PurePosixPath(decoded_name).name or decoded_name
20
+ path = PurePosixPath(basename)
21
+ extension = path.suffix.lower() or _normalize_extension(default_extension)
22
+ stem = path.name[: -len(path.suffix)] if path.suffix else path.name
23
+
24
+ match = re.match(r"^(.+?)\s+--\s+", stem)
25
+ if match:
26
+ stem = match.group(1)
27
+
28
+ stem = stem.strip()
29
+ stem = re.sub(r"\s+", "_", stem)
30
+ stem = re.sub(r"[^a-zA-Z0-9_]", "_", stem)
31
+ stem = re.sub(r"_+", "_", stem)
32
+ stem = stem.strip("_")
33
+ if not stem:
34
+ stem = default_stem
35
+
36
+ return _compose_filename(stem, extension)
37
+
38
+
39
+ def _normalize_extension(value: str) -> str:
40
+ extension = str(value or "").strip().lower()
41
+ if not extension:
42
+ return ""
43
+ if not extension.startswith("."):
44
+ extension = f".{extension}"
45
+ return extension
46
+
47
+
48
+ def _compose_filename(stem: str, extension: str) -> str:
49
+ clean_stem = re.sub(r"[^a-zA-Z0-9_]", "_", str(stem or "").strip())
50
+ clean_stem = re.sub(r"_+", "_", clean_stem).strip("_") or "downloaded_book"
51
+ clean_extension = _normalize_extension(extension)
52
+ return f"{clean_stem}{clean_extension}"