"""Retrieval over the GDScript corpus.

Loads the prebuilt FAISS index (cosine / IndexIDMap2, faiss_id == chunk id) and
chunks.jsonl, embeds the query with the same jina code model used to build the
index, and returns the top-k chunk records. Runs on CPU (query embedding is one
text at a time, fast).
"""
from __future__ import annotations

import json
import os
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path

import faiss
import numpy as np

DATA_DIR = Path(os.environ.get("GDRAG_SPACE_DATA", Path(__file__).parent / "data"))
FAISS_PATH = DATA_DIR / "embeddings.faiss"
CHUNKS_PATH = DATA_DIR / "chunks.jsonl"
EMBED_MODEL = "jinaai/jina-embeddings-v2-base-code"


@dataclass
class Hit:
    score: float
    text: str
    repo: str
    origin_url: str
    file_path: str
    kind: str


# ---------------------------------------------------------------------------
# Lazy singletons (loaded once per process)
# ---------------------------------------------------------------------------
@lru_cache(maxsize=1)
def _index() -> faiss.Index:
    return faiss.read_index(str(FAISS_PATH))


@lru_cache(maxsize=1)
def _chunks() -> dict[int, dict]:
    by_id: dict[int, dict] = {}
    with open(CHUNKS_PATH, "r", encoding="utf-8") as f:
        for line in f:
            if not line.strip():
                continue
            try:
                r = json.loads(line)
            except json.JSONDecodeError:
                continue
            by_id[r["id"]] = r
    return by_id


@lru_cache(maxsize=1)
def _embedder():
    # transformers ~=4.45 (pinned) loads jina's remote code without shims.
    # device="cpu" is REQUIRED on ZeroGPU: query embedding runs in retrieve(),
    # outside the @spaces.GPU block, so CUDA isn't really allocated there — left
    # on auto it lands on a phantom cuda device and returns zero vectors.
    from sentence_transformers import SentenceTransformer
    return SentenceTransformer(EMBED_MODEL, trust_remote_code=True, device="cpu")


def _embed_query(query: str) -> np.ndarray:
    vec = _embedder().encode([query], normalize_embeddings=True,
                             show_progress_bar=False)
    return np.asarray(vec, dtype=np.float32)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def index_available() -> bool:
    return FAISS_PATH.exists() and CHUNKS_PATH.exists()


def retrieve(query: str, k: int = 6) -> list[Hit]:
    """Return the top-k GDScript chunks most relevant to the query.

    Returns [] if the index hasn't been built/uploaded yet, so the Space still
    runs (answers without retrieval) until the Colab build pushes the index.
    """
    if not query.strip() or not index_available():
        return []
    qv = _embed_query(query)
    scores, ids = _index().search(qv, k)
    chunks = _chunks()
    hits: list[Hit] = []
    for score, cid in zip(scores[0], ids[0]):
        if cid < 0:
            continue
        rec = chunks.get(int(cid))
        if not rec:
            continue
        hits.append(Hit(
            score=float(score),
            text=rec.get("text", ""),
            repo=rec.get("repo", ""),
            origin_url=rec.get("origin_url", ""),
            file_path=rec.get("file_path", ""),
            kind=rec.get("kind", ""),
        ))
    return hits


def warmup() -> None:
    """Preload index, chunks and embedder (call at Space startup)."""
    if index_available():
        _index(); _chunks(); _embedder()


if __name__ == "__main__":
    import sys
    q = " ".join(sys.argv[1:]) or "how do I use @export and signals in GDScript"
    print(f"Query: {q}\n")
    for i, h in enumerate(retrieve(q, k=6), 1):
        print(f"[{i}] score={h.score:.3f}  {h.repo}  {h.file_path}")
        print("    " + h.text[:160].replace("\n", " ") + "...\n")