""" FiveK dataset path resolution. Maps image ID (e.g. a2621-_DSC5468) to DNG path using range folders. """ from pathlib import Path from typing import List # Range folders under raw_photos (inclusive low–high) _RANGES = [ (1, 700, "HQa1to700"), (701, 1400, "HQa701to1400"), (1401, 2100, "HQa1400to2100"), (2101, 2800, "HQa2101to2800"), (2801, 3500, "HQa2801to3500"), (3501, 4200, "HQa3501to4200"), (4201, 5000, "HQa4201to5000"), ] def _id_to_number(image_id: str) -> int: """Extract numeric part from id, e.g. a2621-_DSC5468 -> 2621.""" prefix = image_id.split("-")[0] if prefix.startswith("a"): try: return int(prefix[1:]) except ValueError: pass return 0 def image_id_to_dng_path(image_id: str, raw_photos_dir: Path) -> Path: """ Resolve image ID to DNG file path under raw_photos_dir. Returns path like raw_photos_dir/HQa2101to2800/photos/a2621-_DSC5468.dng """ num = _id_to_number(image_id) for low, high, folder_name in _RANGES: if low <= num <= high: return raw_photos_dir / folder_name / "photos" / f"{image_id}.dng" return raw_photos_dir / "HQa1to700" / "photos" / f"{image_id}.dng" def list_all_image_ids(file_list_path: Path) -> List[str]: """Read ordered list of image IDs from filesAdobe.txt (or similar).""" if not file_list_path.exists(): return [] ids = [] with open(file_list_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line and not line.startswith("#"): ids.append(line) return ids