Vortex-Flux / src /modules /ontology_graph.py
klydekushy's picture
Update src/modules/ontology_graph.py
9535a23 verified
"""
MODULE ONTOLOGY GRAPH - V FINAL (FIXED INGESTION & DISPLAY)
===========================================================
Mise à jour : Ingestion stricte par feuille & Affichage Chain of Thought.
"""
import streamlit as st
import pandas as pd
import networkx as nx
from pyvis.network import Network
import tempfile
import streamlit.components.v1 as components
import json
import sys
import os
import time
# --- GESTION DES IMPORTS ---
try:
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
from src.core.ontology_manager import OntologyManager
from src.core.validation_engine import ValidationEngine
from src.core.schema_extractor import SchemaExtractor
from src.rag.ontology_blocks_builder import OntologyBlocksBuilder
from src.Algorithms.vector_search import SemanticIndex
from src.Algorithms.rdf_manager import RDFStore
from src.modules.jasmine_agent import JasmineAgent
from src.core.inference_engine import InferenceEngine # R-BOX ACTIVÉE
except ImportError:
pass
# ==============================================================================
# 1. DISPATCHER D'OUTILS (VOTRE VERSION CONSERVÉE)
# ==============================================================================
def execute_agent_tool(tool_name, args, rdf_store, vector_engine):
"""Exécute l'outil et retourne le résultat + un log pour le debugger"""
result_data = ""
visual_update = None
debug_info = f"🔧 Tool Call: {tool_name}({args})"
try:
if tool_name == "search_semantic":
query = args.get("query", "")
hits = vector_engine.search(query, top_k=5)
if not hits:
result_data = f"❌ Aucun résultat sémantique pour '{query}'."
else:
result_data = f"### 🧠 Résultats Vectoriels (OG-RAG)\n"
for h in hits:
uri_clean = str(h['uri']).split('#')[-1]
meta_info = h.get('meta', {'type': 'Unknown'})
result_data += f"- **{uri_clean}** ({meta_info.get('type', 'Entity')})\n"
preview = str(h['text'])[:250].replace("\n", " ")
result_data += f" > *{preview}...*\n"
elif tool_name == "execute_sparql":
query = args.get("query", "").replace("```sparql", "").replace("```", "").strip()
debug_info += f"\n\n🔍 SPARQL Query:\n{query}"
result_data = rdf_store.execute_sparql(query)
result_data = f"### ⚡ Résultat SPARQL (Calculé sur T-Box Typée)\n\n{result_data}"
elif tool_name == "highlight_node":
raw_id = args.get("node_id", "")
target_id = raw_id.replace("http://vortex.ai/ontology#", "").replace("vortex:", "")
visual_update = {"action": "highlight_node", "target_id": target_id}
result_data = f"✅ Zoom visuel activé sur : {target_id}"
except Exception as e:
result_data = f"❌ Erreur Exécution Outil : {str(e)}"
return result_data, visual_update, debug_info
# ==============================================================================
# 2. UTILS & STYLE (VOTRE THÈME GOTHAM CONSERVÉ)
# ==============================================================================
def apply_gotham_theme():
st.markdown("""
<style>
.stApp { background-color: #0d1117 !important; color: #c9d1d9; }
.ontology-title { font-family: 'Space Grotesk', sans-serif; font-size: 1.5rem; font-weight: 700; color: #ffffff; }
/* Chat Styling - Palantir Style */
.chat-container { border-right: 1px solid #30363d; padding-right: 15px; height: 75vh; overflow-y: auto; }
.user-msg {
background: rgba(30, 136, 229, 0.1);
border-left: 3px solid #1E88E5;
padding: 12px; margin: 10px 0;
border-radius: 0 4px 4px 0;
font-family: 'Inter', sans-serif;
}
.bot-msg {
background: rgba(48, 54, 61, 0.3);
border-left: 3px solid #00E676;
padding: 12px; margin: 10px 0;
border-radius: 0 4px 4px 0;
font-family: 'Inter', sans-serif;
}
/* Debugger Log Style */
.debug-block {
font-family: 'JetBrains Mono', monospace;
font-size: 0.8rem;
color: #8b949e;
background: #0d1117;
border: 1px solid #30363d;
padding: 10px;
margin-top: 5px;
border-radius: 4px;
}
</style>
""", unsafe_allow_html=True)
def get_node_style(entity_type):
colors = {"Client": "#1E88E5", "Garant": "#8E44AD", "Pret": "#F39C12", "Transaction": "#00BCD4"}
return colors.get(str(entity_type).strip(), "#6C757D"), "dot"
def safe_open_sheet(client, name):
for _ in range(5):
try: return client.open(name)
except: time.sleep(1)
return None
def safe_get_records(sh, w_name):
for _ in range(3):
try: return pd.DataFrame(sh.worksheet(w_name).get_all_records())
except: time.sleep(1)
return pd.DataFrame()
# --- MODIFICATION CRITIQUE : INGESTION STRICTE PAR FEUILLE ---
def extract_triplets(ontology_df, client, sheet_name):
triplets = []
sh = safe_open_sheet(client, sheet_name)
if not sh: return pd.DataFrame(), {}
# Pré-chargement des feuilles pour performance
cache = {s: safe_get_records(sh, s) for s in ontology_df['Sheet'].unique()}
sheets_columns = {s: list(df.columns) for s, df in cache.items()}
# Itération sur chaque règle de l'ontologie
for _, rule in ontology_df.iterrows():
target_sheet = rule['Sheet']
df = cache.get(target_sheet)
# Sécurité : Si la feuille est vide ou colonne manquante, on passe
if df is None or df.empty or rule['SubjectCol'] not in df.columns:
continue
for _, row in df.iterrows():
s_val = str(row.get(rule['SubjectCol'], '')).strip().upper()
if not s_val: continue
s_id = f"{rule['SubjectClass']}:{s_val}"
# --- ISOLATION : On ne prend que les colonnes de CETTE ligne dans CETTE feuille ---
# (Cela empêche le mélange des données entre Clients et Remboursements)
clean_props = {k: v for k, v in row.to_dict().items() if v and str(v).strip()}
if rule['ObjectType'] == 'relation':
ocol = str(rule['ObjectColOrConcept'])
oval = str(row.get(ocol, '')).strip()
if oval:
o_cls = ocol.replace("ID_", "") if "ID_" in ocol else ocol
triplets.append({
"subject": s_id,
"predicate": rule['Predicate'],
"object": f"{o_cls}:{oval.upper()}",
"object_type": "entity",
"subject_props": clean_props
})
elif rule['ObjectType'] == 'data_property':
target_col = rule['ObjectColOrConcept']
# On vérifie que la propriété est bien dans la feuille courante
if target_col in df.columns:
oval = str(row.get(target_col, '')).strip()
if oval:
triplets.append({
"subject": s_id,
"predicate": rule['Predicate'],
"object": oval,
"object_type": "literal",
"subject_props": clean_props
})
return pd.DataFrame(triplets), sheets_columns
def apply_visual_actions(G, action_data):
if not action_data: return G
action = action_data.get("action")
target = action_data.get("target_id")
if action == "highlight_node":
for n in G.nodes:
if n != target:
G.nodes[n]['color'] = 'rgba(50,50,50,0.1)'
else: G.nodes[n]['size'] = 50
return G
# ==============================================================================
# 3. MAIN ORCHESTRATOR
# ==============================================================================
def show_ontology_graph(client, sheet_name):
apply_gotham_theme()
# --- STATE INIT ---
if "rdf_store" not in st.session_state: st.session_state["rdf_store"] = RDFStore()
if "vector_engine" not in st.session_state: st.session_state["vector_engine"] = SemanticIndex()
if "ontology_manager" not in st.session_state: st.session_state["ontology_manager"] = OntologyManager()
if "validation_metrics" not in st.session_state: st.session_state["validation_metrics"] = None
if "chat_history" not in st.session_state: st.session_state["chat_history"] = []
if "current_visual_action" not in st.session_state: st.session_state["current_visual_action"] = None
if "jasmine_active" not in st.session_state: st.session_state["jasmine_active"] = False
# --- HEADER ---
c1, c2 = st.columns([5, 1])
with c1: st.markdown('<h1 class="ontology-title">VORTEX FLUX - KG OPERATIONS</h1>', unsafe_allow_html=True)
with c2: st.session_state["jasmine_active"] = st.toggle("TERMINAL", value=st.session_state["jasmine_active"])
# --- LOAD DATA ---
ontology_df = pd.DataFrame(safe_get_records(safe_open_sheet(client, sheet_name), "Ontology"))
if ontology_df.empty: st.caption("Chargement..."); return
triplets_df, _ = extract_triplets(ontology_df, client, sheet_name)
if triplets_df.empty: return
# --- NETWORKX BUILD ---
G = nx.DiGraph()
for _, r in triplets_df.iterrows():
s, p, o = r['subject'], r['predicate'], r['object']
if s not in G.nodes:
c, sh = get_node_style(s.split(':')[0])
G.add_node(s, label=s.split(':')[1] if ':' in s else s, group=s.split(':')[0], color=c, shape=sh, **r.get('subject_props', {}))
if r['object_type'] == 'entity':
if o not in G.nodes:
c, sh = get_node_style(o.split(':')[0])
G.add_node(o, label=o.split(':')[1] if ':' in o else o, group=o.split(':')[0], color=c, shape=sh)
G.add_edge(s, o, label=p)
else:
pid = f"Prop:{hash(o)%10000}"
G.add_node(pid, label=str(o)[:20], group="Prop", color="#6C757D", shape="text", size=10)
G.add_edge(s, pid, label=p, dashes=True)
# --- PIPELINE PHASE 2 (OG-RAG + PIPELINE + INFERENCE) ---
st.session_state["ontology_manager"].load_from_dataframe(ontology_df)
if "pipeline_done" not in st.session_state:
with st.status("🚀 Démarrage Système VORTEX (Palantir Architecture)...", expanded=True) as status:
# 1. Ingestion & Pipeline Cleaning
st.write("1️⃣ Ingestion & Pipeline de Nettoyage (T-Box Stricte)...")
st.session_state["rdf_store"] = RDFStore()
st.session_state["rdf_store"].ingest_networkx_graph(G, st.session_state["ontology_manager"])
# 2. R-Box Inference
st.write("2️⃣ R-BOX : Raisonnement & Déductions (Golden Record)...")
reasoner = InferenceEngine(st.session_state["rdf_store"])
reasoner.run_inference()
# 3. Validation
st.write("3️⃣ Audit Qualité & Extraction Schéma...")
validator = ValidationEngine(st.session_state["rdf_store"], st.session_state["ontology_manager"])
validator.run_validation()
st.session_state["validation_metrics"] = validator.generate_metrics_dashboard()
# 4. Vectorisation (Hyper-blocs enrichis)
st.write("4️⃣ Construction Hyper-blocs & Indexation...")
builder = OntologyBlocksBuilder(st.session_state["rdf_store"])
hyper_blocks = builder.build_all_blocks()
st.session_state["vector_engine"].build_from_ontology_blocks(hyper_blocks)
status.update(label="✅ Système Opérationnel", state="complete", expanded=False)
st.session_state["pipeline_done"] = True
# --- DASHBOARD METRICS (DIAGNOSTIC VISUEL) ---
metrics = st.session_state.get("validation_metrics")
pipeline_stats = st.session_state["rdf_store"].pipeline.get_health_report()
if metrics:
st.markdown("### 🏥 VORTEX HEALTH MONITORING")
c1, c2, c3, c4 = st.columns(4)
c1.metric("Triplets RDF", metrics["triples"])
c2.metric("Score Qualité", f"{metrics['coverage_score']}%")
c3.metric("Données Valides", pipeline_stats.get("valid_entries", 0))
c4.metric("Rejets Contrats", pipeline_stats.get("rejected_contracts", 0), delta_color="inverse")
# --- TABLEAU DE CORRECTION ---
if pipeline_stats.get("rejected_contracts", 0) > 0:
st.error(f"🚨 {pipeline_stats.get('rejected_contracts')} Erreurs détectées. Ouvrez le guide ci-dessous.")
with st.expander("🔍 GUIDE DE CORRECTION EXCEL (Où corriger ?)", expanded=True):
logs = st.session_state["rdf_store"].pipeline.logs
if logs:
df_err = pd.DataFrame(logs)
st.dataframe(df_err, use_container_width=True)
# --- LAYOUT PRINCIPAL ---
if st.session_state["jasmine_active"]:
col_chat, col_graph = st.columns([1, 2], gap="large")
else:
col_graph = st.container()
col_chat = None
# --- SECTION CHAT AVEC DEBUGGER LOG ---
if st.session_state["jasmine_active"] and col_chat:
with col_chat:
st.markdown("### 🤖 AIP COMMAND")
with st.container(height=600):
for msg in st.session_state["chat_history"]:
if msg["role"] == "user":
st.markdown(f'<div class="user-msg">{msg["content"]}</div>', unsafe_allow_html=True)
elif msg["role"] == "debugger":
# C'est ici que le Debugger Log s'affiche (Chain of Thought)
with st.expander("🕵️ AIP Debugger (Chain of Thought)", expanded=False):
st.markdown(f'<div class="debug-block">{msg["content"]}</div>', unsafe_allow_html=True)
else:
st.markdown(f'<div class="bot-msg">{msg["content"]}</div>', unsafe_allow_html=True)
if prompt := st.chat_input("Ordre..."):
st.session_state["chat_history"].append({"role": "user", "content": prompt})
st.rerun()
# --- LOGIQUE AGENT ---
if st.session_state["chat_history"] and st.session_state["chat_history"][-1]["role"] == "user":
last_msg = st.session_state["chat_history"][-1]["content"]
agent = JasmineAgent(st.session_state["rdf_store"], ontology_df.to_dict('records'))
with st.spinner("🧠 Analyse OAG..."):
resp_text, tool_action, thought_trace = agent.ask(last_msg, st.session_state["chat_history"][:-1])
# 1. Ajouter le Debugger Log (Chain of Thought)
if thought_trace:
st.session_state["chat_history"].append({"role": "debugger", "content": thought_trace})
# 2. Exécuter l'Outil et Ajouter le Log Technique
if tool_action:
with st.spinner("⚙️ Exécution Pipeline..."):
tool_args = tool_action.get('args', {})
res, vis, debug_info = execute_agent_tool(
tool_action.get('tool', 'none'),
tool_args,
st.session_state["rdf_store"],
st.session_state["vector_engine"]
)
st.session_state["chat_history"].append({"role": "debugger", "content": debug_info})
if vis: st.session_state["current_visual_action"] = vis
st.session_state["chat_history"].append({"role": "assistant", "content": res})
st.rerun()
elif resp_text:
st.session_state["chat_history"].append({"role": "assistant", "content": resp_text})
st.rerun()
# --- SECTION VISUALISATION ---
with col_graph:
if st.session_state["current_visual_action"]:
G = apply_visual_actions(G, st.session_state["current_visual_action"])
nt = Network(height="700px", width="100%", bgcolor="#0d1117", font_color="#c9d1d9")
nt.from_nx(G)
nt.set_options(json.dumps({
"physics": {"forceAtlas2Based": {"gravitationalConstant": -50, "springLength": 100}},
"nodes": {"font": {"size": 14, "color": "#ffffff"}, "borderWidth": 2},
"interaction": {"hover": True, "navigationButtons": True}
}))
path = tempfile.gettempdir() + "/ontology_viz.html"
nt.save_graph(path)
with open(path, 'r', encoding='utf-8') as f:
components.html(f.read(), height=700)
if st.session_state["current_visual_action"]:
if st.button("🔄 Reset Vue"):
st.session_state["current_visual_action"] = None
st.rerun()