Spaces:

klydekushy
/

Vortex-Flux

Running

App Files Files Community

klydekushy commited on Jan 27

Commit

f820864

verified ·

1 Parent(s): 5b0daa9

Update src/Algorithms/vector_search.py

Browse files

Files changed (1) hide show

src/Algorithms/vector_search.py +20 -76

src/Algorithms/vector_search.py CHANGED Viewed

@@ -1,101 +1,45 @@
 """
-MODULE: VECTOR SEARCH ENGINE (CONTEXT AWARE)
-============================================
-Amélioration : Le vecteur d'un nœud inclut désormais ses voisins et ses relations.
 """
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
-import networkx as nx
 class SemanticIndex:
     def __init__(self, model_name='all-MiniLM-L6-v2'):
-        # Chargement du modèle (rapide sur CPU)
         self.model = SentenceTransformer(model_name)
         self.index = None
         self.uris = []
-        self.metadatas = []
-        self.is_ready = False
-    def build_from_networkx(self, G):
-        """
-        Vectorisation Contextuelle :
-        Texte = Attributs du Nœud + Relations Sortantes (Voisins)
-        """
-        print("⏳ [VECTOR] Génération des embeddings contextuels...")
-        corpus = []
-        self.uris = []
-        self.metadatas = []
-        # On parcourt chaque nœud
-        for node, data in G.nodes(data=True):
-            # 1. Texte de base (Interne)
-            # On prend toutes les valeurs textuelles du nœud
-            internal_attrs = [
-                str(v) for k, v in data.items()
-                if k not in ['group', 'label', 'color', 'shape', 'x', 'y', 'size', 'title']
-                and v and str(v).lower() != 'nan'
-            ]
-            base_text = f"{data.get('group', '')} {data.get('label', '')} {' '.join(internal_attrs)}"
-            # 2. Texte du Voisinage (Relations Sortantes)
-            # "travaille_chez X", "habite_ville Y"
-            neighbors_text = []
-            for neighbor in G.successors(node):
-                # Récupération du type de lien (ex: 'habite_ville')
-                edge_data = G.get_edge_data(node, neighbor)
-                rel_label = edge_data.get('label', 'lié_à')
-                # Récupération du nom du voisin (ex: 'Dakar')
-                neighbor_label = G.nodes[neighbor].get('label', str(neighbor))
-                # On ajoute la phrase de contexte
-                neighbors_text.append(f"{rel_label} {neighbor_label}")
-            # 3. Fusion (Soup)
-            # Ex: "Client Jean Dupont. habite_ville Dakar. secteur_act Commercial."
-            full_context = f"{base_text}. {' '.join(neighbors_text)}"
-            # Nettoyage
-            full_context = full_context.lower().replace('_', ' ')
-            corpus.append(full_context)
-            self.uris.append(node)
-            # Méta-données pour l'affichage dans le chat
-            meta_desc = f"{data.get('group', 'Entité')} - {data.get('label', str(node))}"
-            self.metadatas.append(meta_desc)
-        if not corpus:
-            print("⚠️ Graphe vide, pas de vectorisation.")
-            return
-        # 4. Encodage Batch (Rapide)
         embeddings = self.model.encode(corpus, show_progress_bar=True)
-        # 5. Création Index FAISS
         dimension = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(np.array(embeddings).astype('float32'))
-        self.is_ready = True
-        print(f"✅ [VECTOR] Index FAISS Contextuel prêt : {len(self.uris)} entités.")
-        return len(self.uris)
-    def search(self, query, top_k=10):
-        """Retourne les nœuds les plus proches sémantiquement"""
-        if not self.is_ready:
-            return []
-        query_vec = self.model.encode([query.lower()]).astype('float32')
-        distances, indices = self.index.search(query_vec, top_k)
         results = []
-        for i, idx in enumerate(indices[0]):
-            if idx < len(self.uris) and idx >= 0:
                 results.append({
-                    "uri": self.uris[idx], # C'est l'ID NetworkX (ex: CLI-001)
-                    "meta": self.metadatas[idx],
-                    "score": float(1 / (1 + distances[0][i]))
                 })
         return results

 """
+MODULE: VECTOR SEARCH ENGINE (OG-RAG VERSION)
+=============================================
 """
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
 class SemanticIndex:
     def __init__(self, model_name='all-MiniLM-L6-v2'):
         self.model = SentenceTransformer(model_name)
         self.index = None
         self.uris = []
+        self.content_map = {} # Pour retrouver le texte du bloc
+    def build_from_ontology_blocks(self, blocks):
+        """Vectorise les Hyper-blocs enrichis"""
+        print(f"⏳ [VECTOR] Indexation de {len(blocks)} Hyper-blocs...")
+        corpus = [b['text'] for b in blocks]
+        self.uris = [b['uri'] for b in blocks]
+        self.content_map = {b['uri']: b['text'] for b in blocks}
         embeddings = self.model.encode(corpus, show_progress_bar=True)
         dimension = embeddings.shape[1]
         self.index = faiss.IndexFlatL2(dimension)
         self.index.add(np.array(embeddings).astype('float32'))
+        print("✅ [VECTOR] Index OG-RAG prêt.")
+    def search(self, query, top_k=3):
+        if not self.index: return []
+        query_vec = self.model.encode([query]).astype('float32')
+        dist, indices = self.index.search(query_vec, top_k)
         results = []
+        for idx in indices[0]:
+            if idx != -1:
+                uri = self.uris[idx]
                 results.append({
+                    "uri": uri,
+                    "text": self.content_map[uri],
+                    "score": float(dist[0][0])
                 })
         return results