File size: 1,306 Bytes
d7efa84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""Reference JSON loading and validation."""

import json
import logging
from pathlib import Path

logger = logging.getLogger(__name__)


def load_reference(json_path: str) -> dict[str, str]:
    """Load and validate a reference JSON file.

    Args:
        json_path: Path to the reference JSON file.

    Returns:
        Dictionary mapping audio filenames to transcription strings.

    Raises:
        FileNotFoundError: If the JSON file does not exist.
        ValueError: If the JSON is malformed or not a flat dict[str, str].
    """
    path = Path(json_path)
    if not path.exists():
        raise FileNotFoundError(f"Reference file not found: {json_path}")

    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError:
        raise ValueError(f"Malformed JSON in {json_path}: unable to parse")

    if not isinstance(data, dict):
        raise ValueError(
            f"Reference JSON must be a flat dict, got {type(data).__name__}"
        )

    for key, value in data.items():
        if not isinstance(key, str) or not isinstance(value, str):
            raise ValueError("All keys and values in reference JSON must be strings")

    logger.info(f"Loaded reference JSON: {json_path} ({len(data)} entries)")
    return data