File size: 2,223 Bytes
d575065
f330fba
 
 
d575065
dcd34fc
d575065
dcd34fc
f330fba
d575065
dcd34fc
 
f330fba
 
dcd34fc
 
c141429
f330fba
 
d575065
f820864
 
 
dcd34fc
f820864
 
 
f330fba
c141429
dcd34fc
 
 
 
 
f820864
d575065
f330fba
 
 
 
 
f820864
 
 
 
d575065
 
f330fba
f820864
 
d575065
f820864
f330fba
 
 
d575065
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""
MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED)
===========================================
Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError.
"""
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import os

class SemanticIndex:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        # Gestion du cache pour éviter de re-télécharger le modèle à chaque fois
        os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
        self.model = SentenceTransformer(model_name)
        self.index = None
        self.uris = [] 
        self.content_map = {} 
        self.meta_map = {} # <--- AJOUT CRITIQUE

    def build_from_ontology_blocks(self, blocks):
        """Vectorise les Hyper-blocs enrichis"""
        print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...")
        
        corpus = [b['text'] for b in blocks]
        self.uris = [b['uri'] for b in blocks]
        self.content_map = {b['uri']: b['text'] for b in blocks}
        self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS

        embeddings = self.model.encode(corpus, show_progress_bar=True)
        dimension = embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dimension)
        self.index.add(np.array(embeddings).astype('float32'))
        
        print("✅ [VECTOR] Index OG-RAG prêt.")

    def build_from_networkx(self, G):
        """Compatibilité arrière (Phase 1)"""
        # (Version simplifiée si besoin de fallback)
        pass

    def search(self, query, top_k=3):
        if not self.index: return []
        query_vec = self.model.encode([query]).astype('float32')
        dist, indices = self.index.search(query_vec, top_k)
        
        results = []
        for i, idx in enumerate(indices[0]):
            if idx != -1:
                uri = self.uris[idx]
                results.append({
                    "uri": uri,
                    "text": self.content_map.get(uri, ""),
                    "meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS
                    "score": float(dist[0][i])
                })
        return results