"""
MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED)
===========================================
Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError.
"""
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import os

class SemanticIndex:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        # Gestion du cache pour éviter de re-télécharger le modèle à chaque fois
        os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.uris = [] 
        self.content_map = {} 
        self.meta_map = {} # <--- AJOUT CRITIQUE

    def build_from_ontology_blocks(self, blocks):
        """Vectorise les Hyper-blocs enrichis"""
        print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...")
        
        corpus = [b['text'] for b in blocks]
        self.uris = [b['uri'] for b in blocks]
        self.content_map = {b['uri']: b['text'] for b in blocks}
        self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS

        embeddings = self.model.encode(corpus, show_progress_bar=True)
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dimension)
        self.index.add(np.array(embeddings).astype('float32'))
        
        print("✅ [VECTOR] Index OG-RAG prêt.")

    def build_from_networkx(self, G):
        """Compatibilité arrière (Phase 1)"""
        # (Version simplifiée si besoin de fallback)
        pass

    def search(self, query, top_k=3):
        if not self.index: return []
        query_vec = self.model.encode([query]).astype('float32')
        dist, indices = self.index.search(query_vec, top_k)
        
        results = []
        for i, idx in enumerate(indices[0]):
            if idx != -1:
                uri = self.uris[idx]
                results.append({
                    "uri": uri,
                    "text": self.content_map.get(uri, ""),
                    "meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS
                    "score": float(dist[0][i])
                })
        return results