Spaces:
Running
Running
| """ | |
| MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED) | |
| =========================================== | |
| Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError. | |
| """ | |
| import faiss | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| import os | |
| class SemanticIndex: | |
| def __init__(self, model_name='all-MiniLM-L6-v2'): | |
| # Gestion du cache pour éviter de re-télécharger le modèle à chaque fois | |
| os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1' | |
| self.model = SentenceTransformer(model_name) | |
| self.index = None | |
| self.uris = [] | |
| self.content_map = {} | |
| self.meta_map = {} # <--- AJOUT CRITIQUE | |
| def build_from_ontology_blocks(self, blocks): | |
| """Vectorise les Hyper-blocs enrichis""" | |
| print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...") | |
| corpus = [b['text'] for b in blocks] | |
| self.uris = [b['uri'] for b in blocks] | |
| self.content_map = {b['uri']: b['text'] for b in blocks} | |
| self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS | |
| embeddings = self.model.encode(corpus, show_progress_bar=True) | |
| dimension = embeddings.shape[1] | |
| self.index = faiss.IndexFlatL2(dimension) | |
| self.index.add(np.array(embeddings).astype('float32')) | |
| print("✅ [VECTOR] Index OG-RAG prêt.") | |
| def build_from_networkx(self, G): | |
| """Compatibilité arrière (Phase 1)""" | |
| # (Version simplifiée si besoin de fallback) | |
| pass | |
| def search(self, query, top_k=3): | |
| if not self.index: return [] | |
| query_vec = self.model.encode([query]).astype('float32') | |
| dist, indices = self.index.search(query_vec, top_k) | |
| results = [] | |
| for i, idx in enumerate(indices[0]): | |
| if idx != -1: | |
| uri = self.uris[idx] | |
| results.append({ | |
| "uri": uri, | |
| "text": self.content_map.get(uri, ""), | |
| "meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS | |
| "score": float(dist[0][i]) | |
| }) | |
| return results |