Spaces:
Runtime error
Runtime error
| """ | |
| Shared helpers for collection/curation pipeline. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import re | |
| from pathlib import Path | |
| from typing import List, Optional | |
| ALLOWED_LABEL_PRIORITY = ("strong", "weak", "user") | |
| def parse_label_priority(value: str) -> List[str]: | |
| """ | |
| Parse and validate comma-separated label priority list. | |
| Returns de-duplicated values while preserving order. | |
| """ | |
| raw_items = [item.strip() for item in str(value).split(",") if item.strip()] | |
| if not raw_items: | |
| raise ValueError("label priority cannot be empty") | |
| invalid = [item for item in raw_items if item not in ALLOWED_LABEL_PRIORITY] | |
| if invalid: | |
| raise ValueError(f"Invalid label priority values: {invalid}") | |
| deduped = [] | |
| seen = set() | |
| for item in raw_items: | |
| if item in seen: | |
| continue | |
| deduped.append(item) | |
| seen.add(item) | |
| return deduped | |
| def safe_resolve_in_dir(base_dir: Path, filename: str) -> Optional[Path]: | |
| """ | |
| Resolve a filename safely under base_dir. | |
| Reject nested paths and path traversal patterns. | |
| """ | |
| raw_name = str(filename).strip() | |
| if not raw_name: | |
| return None | |
| safe_name = Path(raw_name).name | |
| if safe_name != raw_name: | |
| return None | |
| root = base_dir.resolve() | |
| candidate = (base_dir / safe_name).resolve() | |
| if os.path.commonpath([str(root), str(candidate)]) != str(root): | |
| return None | |
| return candidate | |
| def sanitize_identifier(value: str, fallback: str, max_len: int = 64) -> str: | |
| """ | |
| Sanitize identifier for filesystem-safe filenames. | |
| """ | |
| clean = re.sub(r"[^A-Za-z0-9_-]", "_", str(value).strip()) | |
| clean = clean[:max_len] | |
| return clean if clean else fallback | |