"""Gestion du vector store FAISS avec embeddings HuggingFace."""

from __future__ import annotations

import json
import logging
from pathlib import Path

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

from src.config import EMBEDDING_MODEL, TOP_K, VECTORSTORE_DIR
from src.document_processor import DocumentChunk, ParsedDocument

logger = logging.getLogger(__name__)


class VectorStore:
    """Vector store FAISS avec persistance sur disque."""

    def __init__(self, model_name: str = EMBEDDING_MODEL):
        logger.info("Chargement du modèle d'embeddings : %s", model_name)
        self.model = SentenceTransformer(model_name)
        self.dimension = self.model.get_sentence_embedding_dimension()

        # Index FAISS (Inner Product après normalisation L2 = cosine similarity)
        self.index = faiss.IndexFlatIP(self.dimension)

        # Stockage parallèle des chunks (même ordre que l'index)
        self.chunks: list[DocumentChunk] = []

        # Registre des documents indexés (hash -> filename)
        self.indexed_docs: dict[str, str] = {}

    # ── Indexation ───────────────────────────────────────────────────────

    def add_document(self, parsed_doc: ParsedDocument) -> int:
        """Ajoute un document parsé au vector store."""
        if parsed_doc.doc_hash in self.indexed_docs:
            logger.info(
                "Document déjà indexé : %s (hash=%s)",
                parsed_doc.filename,
                parsed_doc.doc_hash,
            )
            return 0

        if not parsed_doc.chunks:
            logger.warning("Aucun chunk à indexer pour %s", parsed_doc.filename)
            return 0

        texts = [chunk.text for chunk in parsed_doc.chunks]
        embeddings = self._encode(texts)

        self.index.add(embeddings)
        self.chunks.extend(parsed_doc.chunks)
        self.indexed_docs[parsed_doc.doc_hash] = parsed_doc.filename

        logger.info(
            "Indexé %d chunks pour %s (total index : %d)",
            len(parsed_doc.chunks),
            parsed_doc.filename,
            self.index.ntotal,
        )
        return len(parsed_doc.chunks)

    def add_documents(self, parsed_docs: list[ParsedDocument]) -> int:
        """Ajoute plusieurs documents parsés."""
        total = 0
        for doc in parsed_docs:
            total += self.add_document(doc)
        return total

    # ── Recherche ────────────────────────────────────────────────────────

    def search(self, query: str, top_k: int = TOP_K) -> list[dict]:
        """Recherche les chunks les plus pertinents pour une requête."""
        if self.index.ntotal == 0:
            logger.warning("Vector store vide — aucune recherche possible.")
            return []

        query_embedding = self._encode([query])
        scores, indices = self.index.search(query_embedding, min(top_k, self.index.ntotal))

        results = []
        for score, idx in zip(scores[0], indices[0]):
            if idx < 0:  # FAISS retourne -1 pour les résultats manquants
                continue
            chunk = self.chunks[idx]
            results.append(
                {
                    "text": chunk.text,
                    "metadata": chunk.metadata,
                    "score": float(score),
                }
            )

        return results

    # ── Persistance ──────────────────────────────────────────────────────

    def save(self, name: str = "default") -> Path:
        """Sauvegarde l'index et les métadonnées sur disque."""
        store_dir = VECTORSTORE_DIR / name
        store_dir.mkdir(parents=True, exist_ok=True)

        # Sauvegarder l'index FAISS
        index_path = store_dir / "index.faiss"
        faiss.write_index(self.index, str(index_path))

        # Sauvegarder les chunks (texte + métadonnées)
        chunks_data = [
            {"text": c.text, "metadata": c.metadata} for c in self.chunks
        ]
        chunks_path = store_dir / "chunks.json"
        with open(chunks_path, "w", encoding="utf-8") as f:
            json.dump(chunks_data, f, ensure_ascii=False, indent=2)

        # Sauvegarder le registre des documents
        registry_path = store_dir / "registry.json"
        with open(registry_path, "w", encoding="utf-8") as f:
            json.dump(self.indexed_docs, f, ensure_ascii=False, indent=2)

        logger.info("Vector store sauvegardé dans %s", store_dir)
        return store_dir

    def load(self, name: str = "default") -> bool:
        """Charge un index depuis le disque. Retourne True si réussi."""
        store_dir = VECTORSTORE_DIR / name
        index_path = store_dir / "index.faiss"
        chunks_path = store_dir / "chunks.json"
        registry_path = store_dir / "registry.json"

        if not index_path.exists():
            logger.info("Aucun index sauvegardé trouvé (%s)", store_dir)
            return False

        self.index = faiss.read_index(str(index_path))

        with open(chunks_path, "r", encoding="utf-8") as f:
            chunks_data = json.load(f)
        self.chunks = [
            DocumentChunk(text=c["text"], metadata=c["metadata"])
            for c in chunks_data
        ]

        if registry_path.exists():
            with open(registry_path, "r", encoding="utf-8") as f:
                self.indexed_docs = json.load(f)

        logger.info(
            "Index chargé : %d vecteurs, %d documents",
            self.index.ntotal,
            len(self.indexed_docs),
        )
        return True

    # ── Info ─────────────────────────────────────────────────────────────

    @property
    def stats(self) -> dict:
        """Statistiques du vector store."""
        return {
            "total_vectors": self.index.ntotal,
            "total_chunks": len(self.chunks),
            "indexed_documents": list(self.indexed_docs.values()),
            "embedding_dimension": self.dimension,
        }

    # ── Privé ────────────────────────────────────────────────────────────

    def _encode(self, texts: list[str]) -> np.ndarray:
        """Encode une liste de textes en embeddings normalisés."""
        embeddings = self.model.encode(
            texts, show_progress_bar=False, normalize_embeddings=True
        )
        return np.array(embeddings, dtype=np.float32)