Alex Latipov
Harden frozen eval prompts and judge JSON handling
d745844
"""Small utility helpers for the Text2SPARQL repair pipeline.
No business logic — just file I/O, hashing, and directory management.
"""
from __future__ import annotations
import hashlib
import json
import os
from datetime import datetime, timezone
from pathlib import Path
def save_json(obj: dict, path: str) -> None:
"""Save a dictionary as formatted JSON.
Creates parent directories if they don't exist.
Args:
obj: Dictionary to serialize.
path: Output file path.
"""
out_path = Path(path)
out_path.parent.mkdir(parents=True, exist_ok=True)
with open(out_path, "w") as f:
json.dump(obj, f, indent=2, default=str, ensure_ascii=False)
def load_json(path: str) -> dict:
"""Load a JSON file and return as dictionary.
Args:
path: Path to the JSON file.
Returns:
Parsed dictionary.
Raises:
FileNotFoundError: If file does not exist.
json.JSONDecodeError: If file is not valid JSON.
"""
with open(path, "r") as f:
return json.load(f)
def make_run_dir(base_dir: str, request_id: str) -> str:
"""Create a directory for storing run artifacts.
Structure: <base_dir>/<request_id>/
Args:
base_dir: Base directory for all runs (usually "runs").
request_id: Unique request identifier.
Returns:
Path to the created run directory.
"""
run_dir = os.path.join(base_dir, request_id)
os.makedirs(run_dir, exist_ok=True)
return run_dir
def short_hash(text: str) -> str:
"""Generate a short deterministic hash from text.
Useful for creating candidate IDs from query strings.
Args:
text: Input text to hash.
Returns:
8-character hex digest.
"""
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:8]