Spaces:
Running
Running
| """ | |
| MODULE ONTOLOGY GRAPH - V FINAL (FIXED INGESTION & DISPLAY) | |
| =========================================================== | |
| Mise à jour : Ingestion stricte par feuille & Affichage Chain of Thought. | |
| """ | |
| import streamlit as st | |
| import pandas as pd | |
| import networkx as nx | |
| from pyvis.network import Network | |
| import tempfile | |
| import streamlit.components.v1 as components | |
| import json | |
| import sys | |
| import os | |
| import time | |
| # --- GESTION DES IMPORTS --- | |
| try: | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) | |
| from src.core.ontology_manager import OntologyManager | |
| from src.core.validation_engine import ValidationEngine | |
| from src.core.schema_extractor import SchemaExtractor | |
| from src.rag.ontology_blocks_builder import OntologyBlocksBuilder | |
| from src.Algorithms.vector_search import SemanticIndex | |
| from src.Algorithms.rdf_manager import RDFStore | |
| from src.modules.jasmine_agent import JasmineAgent | |
| from src.core.inference_engine import InferenceEngine # R-BOX ACTIVÉE | |
| except ImportError: | |
| pass | |
| # ============================================================================== | |
| # 1. DISPATCHER D'OUTILS (VOTRE VERSION CONSERVÉE) | |
| # ============================================================================== | |
| def execute_agent_tool(tool_name, args, rdf_store, vector_engine): | |
| """Exécute l'outil et retourne le résultat + un log pour le debugger""" | |
| result_data = "" | |
| visual_update = None | |
| debug_info = f"🔧 Tool Call: {tool_name}({args})" | |
| try: | |
| if tool_name == "search_semantic": | |
| query = args.get("query", "") | |
| hits = vector_engine.search(query, top_k=5) | |
| if not hits: | |
| result_data = f"❌ Aucun résultat sémantique pour '{query}'." | |
| else: | |
| result_data = f"### 🧠 Résultats Vectoriels (OG-RAG)\n" | |
| for h in hits: | |
| uri_clean = str(h['uri']).split('#')[-1] | |
| meta_info = h.get('meta', {'type': 'Unknown'}) | |
| result_data += f"- **{uri_clean}** ({meta_info.get('type', 'Entity')})\n" | |
| preview = str(h['text'])[:250].replace("\n", " ") | |
| result_data += f" > *{preview}...*\n" | |
| elif tool_name == "execute_sparql": | |
| query = args.get("query", "").replace("```sparql", "").replace("```", "").strip() | |
| debug_info += f"\n\n🔍 SPARQL Query:\n{query}" | |
| result_data = rdf_store.execute_sparql(query) | |
| result_data = f"### ⚡ Résultat SPARQL (Calculé sur T-Box Typée)\n\n{result_data}" | |
| elif tool_name == "highlight_node": | |
| raw_id = args.get("node_id", "") | |
| target_id = raw_id.replace("http://vortex.ai/ontology#", "").replace("vortex:", "") | |
| visual_update = {"action": "highlight_node", "target_id": target_id} | |
| result_data = f"✅ Zoom visuel activé sur : {target_id}" | |
| except Exception as e: | |
| result_data = f"❌ Erreur Exécution Outil : {str(e)}" | |
| return result_data, visual_update, debug_info | |
| # ============================================================================== | |
| # 2. UTILS & STYLE (VOTRE THÈME GOTHAM CONSERVÉ) | |
| # ============================================================================== | |
| def apply_gotham_theme(): | |
| st.markdown(""" | |
| <style> | |
| .stApp { background-color: #0d1117 !important; color: #c9d1d9; } | |
| .ontology-title { font-family: 'Space Grotesk', sans-serif; font-size: 1.5rem; font-weight: 700; color: #ffffff; } | |
| /* Chat Styling - Palantir Style */ | |
| .chat-container { border-right: 1px solid #30363d; padding-right: 15px; height: 75vh; overflow-y: auto; } | |
| .user-msg { | |
| background: rgba(30, 136, 229, 0.1); | |
| border-left: 3px solid #1E88E5; | |
| padding: 12px; margin: 10px 0; | |
| border-radius: 0 4px 4px 0; | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .bot-msg { | |
| background: rgba(48, 54, 61, 0.3); | |
| border-left: 3px solid #00E676; | |
| padding: 12px; margin: 10px 0; | |
| border-radius: 0 4px 4px 0; | |
| font-family: 'Inter', sans-serif; | |
| } | |
| /* Debugger Log Style */ | |
| .debug-block { | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.8rem; | |
| color: #8b949e; | |
| background: #0d1117; | |
| border: 1px solid #30363d; | |
| padding: 10px; | |
| margin-top: 5px; | |
| border-radius: 4px; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def get_node_style(entity_type): | |
| colors = {"Client": "#1E88E5", "Garant": "#8E44AD", "Pret": "#F39C12", "Transaction": "#00BCD4"} | |
| return colors.get(str(entity_type).strip(), "#6C757D"), "dot" | |
| def safe_open_sheet(client, name): | |
| for _ in range(5): | |
| try: return client.open(name) | |
| except: time.sleep(1) | |
| return None | |
| def safe_get_records(sh, w_name): | |
| for _ in range(3): | |
| try: return pd.DataFrame(sh.worksheet(w_name).get_all_records()) | |
| except: time.sleep(1) | |
| return pd.DataFrame() | |
| # --- MODIFICATION CRITIQUE : INGESTION STRICTE PAR FEUILLE --- | |
| def extract_triplets(ontology_df, client, sheet_name): | |
| triplets = [] | |
| sh = safe_open_sheet(client, sheet_name) | |
| if not sh: return pd.DataFrame(), {} | |
| # Pré-chargement des feuilles pour performance | |
| cache = {s: safe_get_records(sh, s) for s in ontology_df['Sheet'].unique()} | |
| sheets_columns = {s: list(df.columns) for s, df in cache.items()} | |
| # Itération sur chaque règle de l'ontologie | |
| for _, rule in ontology_df.iterrows(): | |
| target_sheet = rule['Sheet'] | |
| df = cache.get(target_sheet) | |
| # Sécurité : Si la feuille est vide ou colonne manquante, on passe | |
| if df is None or df.empty or rule['SubjectCol'] not in df.columns: | |
| continue | |
| for _, row in df.iterrows(): | |
| s_val = str(row.get(rule['SubjectCol'], '')).strip().upper() | |
| if not s_val: continue | |
| s_id = f"{rule['SubjectClass']}:{s_val}" | |
| # --- ISOLATION : On ne prend que les colonnes de CETTE ligne dans CETTE feuille --- | |
| # (Cela empêche le mélange des données entre Clients et Remboursements) | |
| clean_props = {k: v for k, v in row.to_dict().items() if v and str(v).strip()} | |
| if rule['ObjectType'] == 'relation': | |
| ocol = str(rule['ObjectColOrConcept']) | |
| oval = str(row.get(ocol, '')).strip() | |
| if oval: | |
| o_cls = ocol.replace("ID_", "") if "ID_" in ocol else ocol | |
| triplets.append({ | |
| "subject": s_id, | |
| "predicate": rule['Predicate'], | |
| "object": f"{o_cls}:{oval.upper()}", | |
| "object_type": "entity", | |
| "subject_props": clean_props | |
| }) | |
| elif rule['ObjectType'] == 'data_property': | |
| target_col = rule['ObjectColOrConcept'] | |
| # On vérifie que la propriété est bien dans la feuille courante | |
| if target_col in df.columns: | |
| oval = str(row.get(target_col, '')).strip() | |
| if oval: | |
| triplets.append({ | |
| "subject": s_id, | |
| "predicate": rule['Predicate'], | |
| "object": oval, | |
| "object_type": "literal", | |
| "subject_props": clean_props | |
| }) | |
| return pd.DataFrame(triplets), sheets_columns | |
| def apply_visual_actions(G, action_data): | |
| if not action_data: return G | |
| action = action_data.get("action") | |
| target = action_data.get("target_id") | |
| if action == "highlight_node": | |
| for n in G.nodes: | |
| if n != target: | |
| G.nodes[n]['color'] = 'rgba(50,50,50,0.1)' | |
| else: G.nodes[n]['size'] = 50 | |
| return G | |
| # ============================================================================== | |
| # 3. MAIN ORCHESTRATOR | |
| # ============================================================================== | |
| def show_ontology_graph(client, sheet_name): | |
| apply_gotham_theme() | |
| # --- STATE INIT --- | |
| if "rdf_store" not in st.session_state: st.session_state["rdf_store"] = RDFStore() | |
| if "vector_engine" not in st.session_state: st.session_state["vector_engine"] = SemanticIndex() | |
| if "ontology_manager" not in st.session_state: st.session_state["ontology_manager"] = OntologyManager() | |
| if "validation_metrics" not in st.session_state: st.session_state["validation_metrics"] = None | |
| if "chat_history" not in st.session_state: st.session_state["chat_history"] = [] | |
| if "current_visual_action" not in st.session_state: st.session_state["current_visual_action"] = None | |
| if "jasmine_active" not in st.session_state: st.session_state["jasmine_active"] = False | |
| # --- HEADER --- | |
| c1, c2 = st.columns([5, 1]) | |
| with c1: st.markdown('<h1 class="ontology-title">VORTEX FLUX - KG OPERATIONS</h1>', unsafe_allow_html=True) | |
| with c2: st.session_state["jasmine_active"] = st.toggle("TERMINAL", value=st.session_state["jasmine_active"]) | |
| # --- LOAD DATA --- | |
| ontology_df = pd.DataFrame(safe_get_records(safe_open_sheet(client, sheet_name), "Ontology")) | |
| if ontology_df.empty: st.caption("Chargement..."); return | |
| triplets_df, _ = extract_triplets(ontology_df, client, sheet_name) | |
| if triplets_df.empty: return | |
| # --- NETWORKX BUILD --- | |
| G = nx.DiGraph() | |
| for _, r in triplets_df.iterrows(): | |
| s, p, o = r['subject'], r['predicate'], r['object'] | |
| if s not in G.nodes: | |
| c, sh = get_node_style(s.split(':')[0]) | |
| G.add_node(s, label=s.split(':')[1] if ':' in s else s, group=s.split(':')[0], color=c, shape=sh, **r.get('subject_props', {})) | |
| if r['object_type'] == 'entity': | |
| if o not in G.nodes: | |
| c, sh = get_node_style(o.split(':')[0]) | |
| G.add_node(o, label=o.split(':')[1] if ':' in o else o, group=o.split(':')[0], color=c, shape=sh) | |
| G.add_edge(s, o, label=p) | |
| else: | |
| pid = f"Prop:{hash(o)%10000}" | |
| G.add_node(pid, label=str(o)[:20], group="Prop", color="#6C757D", shape="text", size=10) | |
| G.add_edge(s, pid, label=p, dashes=True) | |
| # --- PIPELINE PHASE 2 (OG-RAG + PIPELINE + INFERENCE) --- | |
| st.session_state["ontology_manager"].load_from_dataframe(ontology_df) | |
| if "pipeline_done" not in st.session_state: | |
| with st.status("🚀 Démarrage Système VORTEX (Palantir Architecture)...", expanded=True) as status: | |
| # 1. Ingestion & Pipeline Cleaning | |
| st.write("1️⃣ Ingestion & Pipeline de Nettoyage (T-Box Stricte)...") | |
| st.session_state["rdf_store"] = RDFStore() | |
| st.session_state["rdf_store"].ingest_networkx_graph(G, st.session_state["ontology_manager"]) | |
| # 2. R-Box Inference | |
| st.write("2️⃣ R-BOX : Raisonnement & Déductions (Golden Record)...") | |
| reasoner = InferenceEngine(st.session_state["rdf_store"]) | |
| reasoner.run_inference() | |
| # 3. Validation | |
| st.write("3️⃣ Audit Qualité & Extraction Schéma...") | |
| validator = ValidationEngine(st.session_state["rdf_store"], st.session_state["ontology_manager"]) | |
| validator.run_validation() | |
| st.session_state["validation_metrics"] = validator.generate_metrics_dashboard() | |
| # 4. Vectorisation (Hyper-blocs enrichis) | |
| st.write("4️⃣ Construction Hyper-blocs & Indexation...") | |
| builder = OntologyBlocksBuilder(st.session_state["rdf_store"]) | |
| hyper_blocks = builder.build_all_blocks() | |
| st.session_state["vector_engine"].build_from_ontology_blocks(hyper_blocks) | |
| status.update(label="✅ Système Opérationnel", state="complete", expanded=False) | |
| st.session_state["pipeline_done"] = True | |
| # --- DASHBOARD METRICS (DIAGNOSTIC VISUEL) --- | |
| metrics = st.session_state.get("validation_metrics") | |
| pipeline_stats = st.session_state["rdf_store"].pipeline.get_health_report() | |
| if metrics: | |
| st.markdown("### 🏥 VORTEX HEALTH MONITORING") | |
| c1, c2, c3, c4 = st.columns(4) | |
| c1.metric("Triplets RDF", metrics["triples"]) | |
| c2.metric("Score Qualité", f"{metrics['coverage_score']}%") | |
| c3.metric("Données Valides", pipeline_stats.get("valid_entries", 0)) | |
| c4.metric("Rejets Contrats", pipeline_stats.get("rejected_contracts", 0), delta_color="inverse") | |
| # --- TABLEAU DE CORRECTION --- | |
| if pipeline_stats.get("rejected_contracts", 0) > 0: | |
| st.error(f"🚨 {pipeline_stats.get('rejected_contracts')} Erreurs détectées. Ouvrez le guide ci-dessous.") | |
| with st.expander("🔍 GUIDE DE CORRECTION EXCEL (Où corriger ?)", expanded=True): | |
| logs = st.session_state["rdf_store"].pipeline.logs | |
| if logs: | |
| df_err = pd.DataFrame(logs) | |
| st.dataframe(df_err, use_container_width=True) | |
| # --- LAYOUT PRINCIPAL --- | |
| if st.session_state["jasmine_active"]: | |
| col_chat, col_graph = st.columns([1, 2], gap="large") | |
| else: | |
| col_graph = st.container() | |
| col_chat = None | |
| # --- SECTION CHAT AVEC DEBUGGER LOG --- | |
| if st.session_state["jasmine_active"] and col_chat: | |
| with col_chat: | |
| st.markdown("### 🤖 AIP COMMAND") | |
| with st.container(height=600): | |
| for msg in st.session_state["chat_history"]: | |
| if msg["role"] == "user": | |
| st.markdown(f'<div class="user-msg">{msg["content"]}</div>', unsafe_allow_html=True) | |
| elif msg["role"] == "debugger": | |
| # C'est ici que le Debugger Log s'affiche (Chain of Thought) | |
| with st.expander("🕵️ AIP Debugger (Chain of Thought)", expanded=False): | |
| st.markdown(f'<div class="debug-block">{msg["content"]}</div>', unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<div class="bot-msg">{msg["content"]}</div>', unsafe_allow_html=True) | |
| if prompt := st.chat_input("Ordre..."): | |
| st.session_state["chat_history"].append({"role": "user", "content": prompt}) | |
| st.rerun() | |
| # --- LOGIQUE AGENT --- | |
| if st.session_state["chat_history"] and st.session_state["chat_history"][-1]["role"] == "user": | |
| last_msg = st.session_state["chat_history"][-1]["content"] | |
| agent = JasmineAgent(st.session_state["rdf_store"], ontology_df.to_dict('records')) | |
| with st.spinner("🧠 Analyse OAG..."): | |
| resp_text, tool_action, thought_trace = agent.ask(last_msg, st.session_state["chat_history"][:-1]) | |
| # 1. Ajouter le Debugger Log (Chain of Thought) | |
| if thought_trace: | |
| st.session_state["chat_history"].append({"role": "debugger", "content": thought_trace}) | |
| # 2. Exécuter l'Outil et Ajouter le Log Technique | |
| if tool_action: | |
| with st.spinner("⚙️ Exécution Pipeline..."): | |
| tool_args = tool_action.get('args', {}) | |
| res, vis, debug_info = execute_agent_tool( | |
| tool_action.get('tool', 'none'), | |
| tool_args, | |
| st.session_state["rdf_store"], | |
| st.session_state["vector_engine"] | |
| ) | |
| st.session_state["chat_history"].append({"role": "debugger", "content": debug_info}) | |
| if vis: st.session_state["current_visual_action"] = vis | |
| st.session_state["chat_history"].append({"role": "assistant", "content": res}) | |
| st.rerun() | |
| elif resp_text: | |
| st.session_state["chat_history"].append({"role": "assistant", "content": resp_text}) | |
| st.rerun() | |
| # --- SECTION VISUALISATION --- | |
| with col_graph: | |
| if st.session_state["current_visual_action"]: | |
| G = apply_visual_actions(G, st.session_state["current_visual_action"]) | |
| nt = Network(height="700px", width="100%", bgcolor="#0d1117", font_color="#c9d1d9") | |
| nt.from_nx(G) | |
| nt.set_options(json.dumps({ | |
| "physics": {"forceAtlas2Based": {"gravitationalConstant": -50, "springLength": 100}}, | |
| "nodes": {"font": {"size": 14, "color": "#ffffff"}, "borderWidth": 2}, | |
| "interaction": {"hover": True, "navigationButtons": True} | |
| })) | |
| path = tempfile.gettempdir() + "/ontology_viz.html" | |
| nt.save_graph(path) | |
| with open(path, 'r', encoding='utf-8') as f: | |
| components.html(f.read(), height=700) | |
| if st.session_state["current_visual_action"]: | |
| if st.button("🔄 Reset Vue"): | |
| st.session_state["current_visual_action"] = None | |
| st.rerun() |