"""
FilGoalBot — On-disk LLM response cache.

Keyed on (model, intent, sorted chunk_ids, normalised query). Intentionally
file-based + JSON: trivial to inspect, trivial to invalidate by deletion, and
survives across processes (eval re-runs, API restarts).

Saves Groq tokens on:
  - eval re-runs (same 50 questions, same retrieved chunks)
  - production duplicates (the same query within the eviction window)
"""

import hashlib
import json
import time
from pathlib import Path

CACHE_DIR = Path(".cache/llm")
CACHE_DIR.mkdir(parents=True, exist_ok=True)

# 30 days. Football news goes stale much faster, but the cache key includes the
# retrieved chunk IDs — when new articles come in, the chunk set shifts and the
# key changes naturally.
DEFAULT_TTL_SECONDS = 30 * 24 * 3600


def _make_key(
    model: str,
    intent: str,
    chunk_ids: list[str],
    query: str,
) -> str:
    payload = json.dumps(
        {
            "model": model,
            "intent": intent,
            "chunks": sorted(chunk_ids),
            "query": query.strip().lower(),
        },
        ensure_ascii=False,
        sort_keys=True,
    )
    return hashlib.sha256(payload.encode("utf-8")).hexdigest()[:32]


def get(model: str, intent: str, chunk_ids: list[str], query: str,
        ttl_seconds: int = DEFAULT_TTL_SECONDS) -> str | None:
    key = _make_key(model, intent, chunk_ids, query)
    path = CACHE_DIR / f"{key}.json"
    if not path.exists():
        return None
    try:
        entry = json.loads(path.read_text(encoding="utf-8"))
    except (json.JSONDecodeError, OSError):
        return None
    if time.time() - entry.get("ts", 0) >= ttl_seconds:
        return None
    return entry.get("answer")


def put(model: str, intent: str, chunk_ids: list[str], query: str,
        answer: str) -> None:
    key = _make_key(model, intent, chunk_ids, query)
    path = CACHE_DIR / f"{key}.json"
    path.write_text(
        json.dumps(
            {"ts": time.time(), "answer": answer, "query": query, "intent": intent},
            ensure_ascii=False,
        ),
        encoding="utf-8",
    )


def estimate_tokens(text: str) -> int:
    """Rough token estimate for budget guard. Arabic averages ~3 chars/token
    on llama tokenizers; we use 3 to err conservative (overestimate)."""
    return max(1, len(text) // 3)