Spaces:
Sleeping
Sleeping
| """Reference JSON loading and validation.""" | |
| import json | |
| import logging | |
| from pathlib import Path | |
| logger = logging.getLogger(__name__) | |
| def load_reference(json_path: str) -> dict[str, str]: | |
| """Load and validate a reference JSON file. | |
| Args: | |
| json_path: Path to the reference JSON file. | |
| Returns: | |
| Dictionary mapping audio filenames to transcription strings. | |
| Raises: | |
| FileNotFoundError: If the JSON file does not exist. | |
| ValueError: If the JSON is malformed or not a flat dict[str, str]. | |
| """ | |
| path = Path(json_path) | |
| if not path.exists(): | |
| raise FileNotFoundError(f"Reference file not found: {json_path}") | |
| try: | |
| with open(path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| except json.JSONDecodeError: | |
| raise ValueError(f"Malformed JSON in {json_path}: unable to parse") | |
| if not isinstance(data, dict): | |
| raise ValueError( | |
| f"Reference JSON must be a flat dict, got {type(data).__name__}" | |
| ) | |
| for key, value in data.items(): | |
| if not isinstance(key, str) or not isinstance(value, str): | |
| raise ValueError("All keys and values in reference JSON must be strings") | |
| logger.info(f"Loaded reference JSON: {json_path} ({len(data)} entries)") | |
| return data | |