"""Reference JSON loading and validation.""" import json import logging from pathlib import Path logger = logging.getLogger(__name__) def load_reference(json_path: str) -> dict[str, str]: """Load and validate a reference JSON file. Args: json_path: Path to the reference JSON file. Returns: Dictionary mapping audio filenames to transcription strings. Raises: FileNotFoundError: If the JSON file does not exist. ValueError: If the JSON is malformed or not a flat dict[str, str]. """ path = Path(json_path) if not path.exists(): raise FileNotFoundError(f"Reference file not found: {json_path}") try: with open(path, "r", encoding="utf-8") as f: data = json.load(f) except json.JSONDecodeError: raise ValueError(f"Malformed JSON in {json_path}: unable to parse") if not isinstance(data, dict): raise ValueError( f"Reference JSON must be a flat dict, got {type(data).__name__}" ) for key, value in data.items(): if not isinstance(key, str) or not isinstance(value, str): raise ValueError("All keys and values in reference JSON must be strings") logger.info(f"Loaded reference JSON: {json_path} ({len(data)} entries)") return data