Spaces:

InsanAlex
/

iris-at-text2sparql

Running on CPU Upgrade

File size: 1,819 Bytes

d745844

"""Small utility helpers for the Text2SPARQL repair pipeline.

No business logic — just file I/O, hashing, and directory management.
"""

from __future__ import annotations

import hashlib
import json
import os
from datetime import datetime, timezone
from pathlib import Path


def save_json(obj: dict, path: str) -> None:
    """Save a dictionary as formatted JSON.

    Creates parent directories if they don't exist.

    Args:
        obj: Dictionary to serialize.
        path: Output file path.
    """
    out_path = Path(path)
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with open(out_path, "w") as f:
        json.dump(obj, f, indent=2, default=str, ensure_ascii=False)


def load_json(path: str) -> dict:
    """Load a JSON file and return as dictionary.

    Args:
        path: Path to the JSON file.

    Returns:
        Parsed dictionary.

    Raises:
        FileNotFoundError: If file does not exist.
        json.JSONDecodeError: If file is not valid JSON.
    """
    with open(path, "r") as f:
        return json.load(f)


def make_run_dir(base_dir: str, request_id: str) -> str:
    """Create a directory for storing run artifacts.

    Structure: <base_dir>/<request_id>/

    Args:
        base_dir: Base directory for all runs (usually "runs").
        request_id: Unique request identifier.

    Returns:
        Path to the created run directory.
    """
    run_dir = os.path.join(base_dir, request_id)
    os.makedirs(run_dir, exist_ok=True)
    return run_dir


def short_hash(text: str) -> str:
    """Generate a short deterministic hash from text.

    Useful for creating candidate IDs from query strings.

    Args:
        text: Input text to hash.

    Returns:
        8-character hex digest.
    """
    return hashlib.sha256(text.encode("utf-8")).hexdigest()[:8]