""" MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED) =========================================== Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError. """ import faiss import numpy as np from sentence_transformers import SentenceTransformer import os class SemanticIndex: def __init__(self, model_name='all-MiniLM-L6-v2'): # Gestion du cache pour éviter de re-télécharger le modèle à chaque fois os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1' self.model = SentenceTransformer(model_name) self.index = None self.uris = [] self.content_map = {} self.meta_map = {} # <--- AJOUT CRITIQUE def build_from_ontology_blocks(self, blocks): """Vectorise les Hyper-blocs enrichis""" print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...") corpus = [b['text'] for b in blocks] self.uris = [b['uri'] for b in blocks] self.content_map = {b['uri']: b['text'] for b in blocks} self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS embeddings = self.model.encode(corpus, show_progress_bar=True) dimension = embeddings.shape[1] self.index = faiss.IndexFlatL2(dimension) self.index.add(np.array(embeddings).astype('float32')) print("✅ [VECTOR] Index OG-RAG prêt.") def build_from_networkx(self, G): """Compatibilité arrière (Phase 1)""" # (Version simplifiée si besoin de fallback) pass def search(self, query, top_k=3): if not self.index: return [] query_vec = self.model.encode([query]).astype('float32') dist, indices = self.index.search(query_vec, top_k) results = [] for i, idx in enumerate(indices[0]): if idx != -1: uri = self.uris[idx] results.append({ "uri": uri, "text": self.content_map.get(uri, ""), "meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS "score": float(dist[0][i]) }) return results