klydekushy commited on
Commit
f330fba
·
verified ·
1 Parent(s): f600486

Update src/Algorithms/vector_search.py

Browse files
Files changed (1) hide show
  1. src/Algorithms/vector_search.py +18 -6
src/Algorithms/vector_search.py CHANGED
@@ -1,17 +1,22 @@
1
  """
2
- MODULE: VECTOR SEARCH ENGINE (OG-RAG VERSION)
3
- =============================================
 
4
  """
5
  import faiss
6
  import numpy as np
7
  from sentence_transformers import SentenceTransformer
 
8
 
9
  class SemanticIndex:
10
  def __init__(self, model_name='all-MiniLM-L6-v2'):
 
 
11
  self.model = SentenceTransformer(model_name)
12
  self.index = None
13
  self.uris = []
14
- self.content_map = {} # Pour retrouver le texte du bloc
 
15
 
16
  def build_from_ontology_blocks(self, blocks):
17
  """Vectorise les Hyper-blocs enrichis"""
@@ -20,6 +25,7 @@ class SemanticIndex:
20
  corpus = [b['text'] for b in blocks]
21
  self.uris = [b['uri'] for b in blocks]
22
  self.content_map = {b['uri']: b['text'] for b in blocks}
 
23
 
24
  embeddings = self.model.encode(corpus, show_progress_bar=True)
25
  dimension = embeddings.shape[1]
@@ -28,18 +34,24 @@ class SemanticIndex:
28
 
29
  print("✅ [VECTOR] Index OG-RAG prêt.")
30
 
 
 
 
 
 
31
  def search(self, query, top_k=3):
32
  if not self.index: return []
33
  query_vec = self.model.encode([query]).astype('float32')
34
  dist, indices = self.index.search(query_vec, top_k)
35
 
36
  results = []
37
- for idx in indices[0]:
38
  if idx != -1:
39
  uri = self.uris[idx]
40
  results.append({
41
  "uri": uri,
42
- "text": self.content_map[uri],
43
- "score": float(dist[0][0])
 
44
  })
45
  return results
 
1
  """
2
+ MODULE: VECTOR SEARCH ENGINE (OG-RAG FIXED)
3
+ ===========================================
4
+ Correction : Ajout de la gestion des métadonnées (meta_map) pour éviter le KeyError.
5
  """
6
  import faiss
7
  import numpy as np
8
  from sentence_transformers import SentenceTransformer
9
+ import os
10
 
11
  class SemanticIndex:
12
  def __init__(self, model_name='all-MiniLM-L6-v2'):
13
+ # Gestion du cache pour éviter de re-télécharger le modèle à chaque fois
14
+ os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
15
  self.model = SentenceTransformer(model_name)
16
  self.index = None
17
  self.uris = []
18
+ self.content_map = {}
19
+ self.meta_map = {} # <--- AJOUT CRITIQUE
20
 
21
  def build_from_ontology_blocks(self, blocks):
22
  """Vectorise les Hyper-blocs enrichis"""
 
25
  corpus = [b['text'] for b in blocks]
26
  self.uris = [b['uri'] for b in blocks]
27
  self.content_map = {b['uri']: b['text'] for b in blocks}
28
+ self.meta_map = {b['uri']: b['metadata'] for b in blocks} # <--- ON STOCKE LES METAS
29
 
30
  embeddings = self.model.encode(corpus, show_progress_bar=True)
31
  dimension = embeddings.shape[1]
 
34
 
35
  print("✅ [VECTOR] Index OG-RAG prêt.")
36
 
37
+ def build_from_networkx(self, G):
38
+ """Compatibilité arrière (Phase 1)"""
39
+ # (Version simplifiée si besoin de fallback)
40
+ pass
41
+
42
  def search(self, query, top_k=3):
43
  if not self.index: return []
44
  query_vec = self.model.encode([query]).astype('float32')
45
  dist, indices = self.index.search(query_vec, top_k)
46
 
47
  results = []
48
+ for i, idx in enumerate(indices[0]):
49
  if idx != -1:
50
  uri = self.uris[idx]
51
  results.append({
52
  "uri": uri,
53
+ "text": self.content_map.get(uri, ""),
54
+ "meta": self.meta_map.get(uri, {"type": "Unknown", "label": "?"}), # <--- ON RENVOIE LES METAS
55
+ "score": float(dist[0][i])
56
  })
57
  return results