Spaces:
Running
Running
File size: 2,223 Bytes
d575065 f330fba d575065 dcd34fc d575065 dcd34fc f330fba d575065 dcd34fc f330fba dcd34fc c141429 f330fba d575065 f820864 dcd34fc f820864 f330fba c141429 dcd34fc f820864 d575065 f330fba f820864 d575065 f330fba f820864 d575065 f820864 f330fba d575065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
"""
MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED)
===========================================
Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError.
"""
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import os
class SemanticIndex:
def __init__(self, model_name='all-MiniLM-L6-v2'):
# Gestion du cache pour éviter de re-télécharger le modèle à chaque fois
os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
self.model = SentenceTransformer(model_name)
self.index = None
self.uris = []
self.content_map = {}
self.meta_map = {} # <--- AJOUT CRITIQUE
def build_from_ontology_blocks(self, blocks):
"""Vectorise les Hyper-blocs enrichis"""
print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...")
corpus = [b['text'] for b in blocks]
self.uris = [b['uri'] for b in blocks]
self.content_map = {b['uri']: b['text'] for b in blocks}
self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS
embeddings = self.model.encode(corpus, show_progress_bar=True)
dimension = embeddings.shape[1]
self.index = faiss.IndexFlatL2(dimension)
self.index.add(np.array(embeddings).astype('float32'))
print("✅ [VECTOR] Index OG-RAG prêt.")
def build_from_networkx(self, G):
"""Compatibilité arrière (Phase 1)"""
# (Version simplifiée si besoin de fallback)
pass
def search(self, query, top_k=3):
if not self.index: return []
query_vec = self.model.encode([query]).astype('float32')
dist, indices = self.index.search(query_vec, top_k)
results = []
for i, idx in enumerate(indices[0]):
if idx != -1:
uri = self.uris[idx]
results.append({
"uri": uri,
"text": self.content_map.get(uri, ""),
"meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS
"score": float(dist[0][i])
})
return results |