Doc2GL Deploy
Gemini: add model fallback list and GEMINI_MODEL override to avoid 404 NOT_FOUND
7a2d1b0 | """ | |
| ╔══════════════════════════════════════════════════════════════════════════════╗ | |
| ║ DOC2GL v2.0 ║ | |
| ║ ║ | |
| ║ Description : Système de conversion de documents (PDF/Images) en graphes ║ | |
| ║ de connaissances avec évaluation de la qualité par ║ | |
| ║ comparaison fuzzy et sémantique ║ | |
| ║ ║ | |
| ║ Auteur : YOUMBI CHATUE DANIELE ║ | |
| ║ Date : [10/09/2025] ║ | |
| ║ Version : 2.0 ║ | |
| ╚══════════════════════════════════════════════════════════════════════════════╝ | |
| """ | |
| # ============================================ | |
| # IMPORTS DES BIBLIOTHÈQUES | |
| # ============================================ | |
| # Interface utilisateur | |
| import gradio as gr | |
| gr_update = gr.update | |
| # Manipulation de fichiers et système | |
| import glob | |
| import os | |
| import io | |
| import shutil | |
| import tempfile | |
| import time | |
| import json | |
| import base64 | |
| import logging | |
| # Traitement de documents | |
| import fitz # PyMuPDF pour la conversion PDF → Image | |
| from PIL import Image # Manipulation d'images | |
| # Base de données de graphes | |
| from neo4j import GraphDatabase # Stockage des graphes de connaissances | |
| # Modèles d'IA pour génération de graphes | |
| import google.genai as genai | |
| from google.genai import types | |
| from gemma import generate_mermaid_from_gemma | |
| from llama import generate_mermaid_from_llama | |
| from Qwen import generate_mermaid_from_qwen | |
| from nvidia_nemotron import generate_mermaid_from_nvidia | |
| from intern_vl import generate_mermaid_from_intern | |
| # Génération de rapports | |
| from fpdf import FPDF | |
| import matplotlib.pyplot as plt | |
| # Comparaison et matching | |
| from rapidfuzz import process, fuzz # Fuzzy matching (similarité textuelle) | |
| from sentence_transformers import SentenceTransformer, util # Matching sémantique | |
| import torch | |
| import numpy as np | |
| from dotenv import load_dotenv | |
| # Agrégation de mindmaps | |
| from aggregation import aggregate_mindmaps | |
| # En haut de app.py, après les imports | |
| __all__ = [ | |
| 'convert_pdf_to_image', | |
| 'encode_image', | |
| 'generate_mermaid_from_image_gemini', | |
| 'generate_mermaid_from_llama', | |
| 'generate_mermaid_from_gemma', | |
| 'generate_mermaid_from_qwen', | |
| 'generate_mermaid_from_nvidia', | |
| 'generate_mermaid_from_intern', | |
| 'mermaid_to_json', | |
| 'load_json', | |
| 'compare_graphs_with_semantic_fast' | |
| ] | |
| load_dotenv() | |
| _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| os.chdir(_SCRIPT_DIR) | |
| # ============================================ | |
| # CONFIGURATION GLOBALE | |
| # ============================================ | |
| # Configuration du système de logging pour tracer les opérations | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s' | |
| ) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Configuration Neo4j (Base de données de graphes) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://localhost:7687") # Adresse du serveur Neo4j | |
| NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j") # Nom d'utilisateur | |
| NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "123456789") # Mot de passe | |
| TEMP_FOLDER = "temp_images" # Dossier temporaire pour les images | |
| OUTPUT_JSON_DIR = os.path.join(_SCRIPT_DIR, "json_files") | |
| GT_ORIGINAL_PATH = os.path.join(_SCRIPT_DIR, "GT", "OOP_MindMAP_GroundTruth_00.json") | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Variables globales pour le suivi des performances | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| model_precision_records = [] # Stocke les métriques de chaque modèle testé | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Configuration du modèle de matching sémantique (lazy loading) | |
| # ───────────────────────────────────────────────────────────────────────────── | |
| # Modèle multilingue optimisé pour la vitesse (12 couches au lieu de 24) | |
| _semantic_model = None | |
| def get_semantic_model(): | |
| global _semantic_model | |
| if _semantic_model is None: | |
| _semantic_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2') | |
| _semantic_model.eval() | |
| return _semantic_model | |
| # ============================================ | |
| # SECTION 1 : FONCTIONS DE BASE (UTILITAIRES) | |
| # ============================================ | |
| def convert_pdf_to_image(pdf_path, output_folder=TEMP_FOLDER): | |
| """Convertit la première page d'un PDF en image PNG.""" | |
| try: | |
| os.makedirs(output_folder, exist_ok=True) | |
| pdf_document = fitz.open(pdf_path) | |
| page = pdf_document[0] | |
| pix = page.get_pixmap() | |
| pdf_name = os.path.splitext(os.path.basename(pdf_path))[0] | |
| image_path = os.path.join(output_folder, f"{pdf_name}_page1.png") | |
| pix.save(image_path) | |
| pdf_document.close() | |
| logging.info(f"PDF {pdf_path} converti en image {image_path} avec succès.") | |
| return image_path | |
| except Exception as e: | |
| logging.error(f"Erreur lors de la conversion du PDF : {e}") | |
| raise | |
| def encode_image(img_path): | |
| """Encode une image en Base64.""" | |
| try: | |
| with open(img_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| except Exception as e: | |
| logging.error(f"Erreur lors de l'encodage de l'image : {e}") | |
| raise | |
| def escape_string(value): | |
| """Échappe les caractères problématiques pour Cypher.""" | |
| return value.replace("'", "''").replace("#", "\\#").replace('"', '\\"') | |
| def load_json(file_path): | |
| """Charge un fichier JSON contenant un graphe.""" | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| return data["nodes"], data["edges"] | |
| # ============================================ | |
| # SECTION 2 : GÉNÉRATION DE GRAPHES PAR LES MODÈLES D'IA | |
| # ============================================ | |
| def generate_mermaid_from_image_gemini(base64_image): | |
| """Génère un diagramme Mermaid à partir d'une image avec Gemini.""" | |
| api_key = os.environ.get("GEMINI_API_KEY") | |
| if not api_key: | |
| raise EnvironmentError("La clé API GEMINI_API_KEY n'est pas définie.") | |
| model_from_env = os.environ.get("GEMINI_MODEL", "").strip() | |
| model_candidates = [ | |
| model_from_env, | |
| "gemini-2.0-flash", | |
| "gemini-2.0-flash-lite", | |
| "gemini-1.5-flash-latest", | |
| "gemini-1.5-pro-latest", | |
| ] | |
| model_candidates = [m for m in model_candidates if m] | |
| try: | |
| client = genai.Client(api_key=api_key) | |
| prompt = """ | |
| Analyse cette image et extrais les entités et leurs relations pour créer un graphe de connaissances. | |
| Génère UNIQUEMENT un diagramme Mermaid valide avec: | |
| - Les nœuds principaux (personnes, organisations, concepts, etc.) | |
| - Les relations entre eux | |
| - Format: graph TD; A[Entité1] --> B[Entité2]; | |
| Important: | |
| - Ne génère QUE le code Mermaid, sans explications | |
| - Utilise des noms courts et clairs pour les entités | |
| - Inclus les relations les plus importantes seulement | |
| """ | |
| last_error = None | |
| response = None | |
| for model_name in model_candidates: | |
| try: | |
| response = client.models.generate_content( | |
| model=model_name, | |
| contents=[ | |
| types.Content( | |
| role="user", | |
| parts=[ | |
| types.Part.from_text(text=prompt), | |
| types.Part.from_bytes( | |
| data=base64.b64decode(base64_image), | |
| mime_type="image/png", | |
| ), | |
| ], | |
| ) | |
| ], | |
| ) | |
| break | |
| except Exception as e: | |
| last_error = e | |
| continue | |
| if response is None: | |
| raise last_error if last_error is not None else RuntimeError("Échec Gemini: aucune réponse") | |
| mermaid_code = (getattr(response, "text", None) or "").strip() | |
| if not mermaid_code: | |
| try: | |
| mermaid_code = ( | |
| response.candidates[0].content.parts[0].text | |
| ).strip() | |
| except Exception: | |
| mermaid_code = "" | |
| # Nettoyer le code pour s'assurer qu'il est valide | |
| if not mermaid_code.startswith("graph"): | |
| mermaid_code = f"graph TD;\n{mermaid_code}" | |
| return mermaid_code | |
| except Exception as e: | |
| logging.error(f"Erreur lors de la génération Mermaid avec Gemini: {e}") | |
| raise | |
| def mermaid_to_json(mermaid_code): | |
| """Convertit un code Mermaid en structure JSON.""" | |
| try: | |
| lines = [line.strip() for line in mermaid_code.split("\n") if "-->" in line] | |
| nodes = {} | |
| edges = [] | |
| for line in lines: | |
| source, rest = line.split("-->") | |
| source = source.strip() | |
| target = rest.strip() | |
| source_parts = source.split("[") | |
| source_id = source_parts[0].strip() | |
| source_label = ( | |
| source_parts[1].replace("]", "").strip() | |
| if len(source_parts) > 1 | |
| else source_id | |
| ) | |
| target_parts = target.split("[") | |
| target_id = target_parts[0].strip() | |
| target_label = ( | |
| target_parts[1].replace("]", "").strip() | |
| if len(target_parts) > 1 | |
| else target_id | |
| ) | |
| if source_id not in nodes: | |
| nodes[source_id] = source_label | |
| if target_id not in nodes: | |
| nodes[target_id] = target_label | |
| edges.append({"source": source_id, "target": target_id}) | |
| return { | |
| "nodes": [ | |
| {"id": node_id, "label": label} | |
| for node_id, label in nodes.items() | |
| ], | |
| "edges": edges | |
| } | |
| except Exception as e: | |
| logging.error(f"Erreur lors de la conversion Mermaid → JSON : {e}") | |
| raise | |
| def graph_json_to_mermaid(graph_json): | |
| """Convertit un graphe JSON {nodes:[{id,label}], edges:[{source,target}]} en Mermaid.""" | |
| if not graph_json: | |
| return "graph TD;" | |
| nodes = graph_json.get("nodes") or [] | |
| edges = graph_json.get("edges") or [] | |
| def _sanitize_id(raw_id): | |
| # Mermaid node IDs: avoid spaces/special chars | |
| return "N_" + "".join(ch if ch.isalnum() or ch == "_" else "_" for ch in str(raw_id)) | |
| id_map = {} | |
| lines = ["graph TD;"] | |
| for n in nodes: | |
| raw_id = n.get("id") | |
| label = n.get("label") or raw_id | |
| mid = _sanitize_id(raw_id) | |
| id_map[str(raw_id)] = mid | |
| safe_label = str(label).replace("\"", "'") | |
| lines.append(f" {mid}[\"{safe_label}\"]; ") | |
| for e in edges: | |
| src = id_map.get(str(e.get("source")), _sanitize_id(e.get("source"))) | |
| tgt = id_map.get(str(e.get("target")), _sanitize_id(e.get("target"))) | |
| lines.append(f" {src} --> {tgt};") | |
| return "\n".join(lines) | |
| def mermaid_to_html(mermaid_code, elem_id): | |
| """Wrap Mermaid code into HTML that renders inside Gradio.""" | |
| safe = (mermaid_code or "graph TD;").replace("</script>", "</scr" + "ipt>") | |
| return f""" | |
| <div style=\"width:100%;min-height:520px;border:1px solid #e0e0e0;border-radius:8px;background:#fff;overflow:auto;\"> | |
| <div id=\"{elem_id}\" class=\"mermaid\" style=\"padding:16px;\">{safe}</div> | |
| </div> | |
| <script> | |
| (function() {{ | |
| function render() {{ | |
| if (!window.mermaid) return; | |
| try {{ | |
| window.mermaid.initialize({{ startOnLoad: false, securityLevel: 'loose' }}); | |
| window.mermaid.run({{ nodes: [document.getElementById('{elem_id}')] }}); | |
| }} catch (e) {{ console.error(e); }} | |
| }} | |
| if (!document.getElementById('mermaidjs_loader')) {{ | |
| var s = document.createElement('script'); | |
| s.id = 'mermaidjs_loader'; | |
| s.src = 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js'; | |
| s.onload = render; | |
| document.head.appendChild(s); | |
| }} else {{ | |
| render(); | |
| }} | |
| }})(); | |
| </script> | |
| """ | |
| # ============================================ | |
| # SECTION 3 : GESTION DE LA BASE NEO4J | |
| # ============================================ | |
| def clear_neo4j(): | |
| """Efface complètement la base de données Neo4j.""" | |
| try: | |
| driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) | |
| with driver.session() as session: | |
| session.run("MATCH (n) DETACH DELETE n") | |
| driver.close() | |
| logging.info("Base de données Neo4j vidée avec succès.") | |
| return True | |
| except Exception as e: | |
| logging.error(f"Neo4j indisponible ou mal configuré (clear_neo4j): {e}") | |
| return False | |
| def upload_gt_and_test_to_neo4j(gt_json, test_json, doc_name): | |
| """Upload GT et Test dans Neo4j avec déduplic | |
| ation.""" | |
| try: | |
| driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) | |
| except Exception as e: | |
| logging.error(f"Neo4j indisponible ou mal configuré (driver): {e}") | |
| return False | |
| try: | |
| with driver.session() as session: | |
| gt_tag = f"{doc_name}_GT" | |
| # Upload GT | |
| unique_gt_nodes = {} | |
| for node in gt_json["nodes"]: | |
| node_id = node["id"] | |
| if node_id not in unique_gt_nodes: | |
| unique_gt_nodes[node_id] = node | |
| for node_id, node in unique_gt_nodes.items(): | |
| prefixed_id = f"GT_{node_id}" | |
| session.run( | |
| """ | |
| MERGE (e:Entity {id:$id, doc:$doc}) | |
| ON CREATE SET e.name = $name | |
| ON MATCH SET e.name = $name | |
| """, | |
| { | |
| "id": prefixed_id, | |
| "name": escape_string(node["label"]), | |
| "doc": gt_tag, | |
| }, | |
| ) | |
| unique_gt_edges = {} | |
| for edge in gt_json["edges"]: | |
| edge_key = (edge["source"], edge["target"]) | |
| if edge_key not in unique_gt_edges: | |
| unique_gt_edges[edge_key] = edge | |
| for (src, tgt), edge in unique_gt_edges.items(): | |
| src_id = f"GT_{src}" | |
| tgt_id = f"GT_{tgt}" | |
| session.run( | |
| """ | |
| MATCH (a:Entity {id:$src, doc:$doc}) | |
| MATCH (b:Entity {id:$tgt, doc:$doc}) | |
| MERGE (a)-[r:RELATED_TO {doc:$doc}]->(b) | |
| """, | |
| { | |
| "src": src_id, | |
| "tgt": tgt_id, | |
| "doc": gt_tag, | |
| }, | |
| ) | |
| # Upload Test | |
| unique_test_nodes = {} | |
| for node in test_json["nodes"]: | |
| node_id = node["id"] | |
| if node_id not in unique_test_nodes: | |
| unique_test_nodes[node_id] = node | |
| for node_id, node in unique_test_nodes.items(): | |
| prefixed_id = f"TEST_{node_id}" | |
| session.run( | |
| """ | |
| MERGE (e:Entity {id:$id, doc:$doc}) | |
| ON CREATE SET e.name = $name | |
| ON MATCH SET e.name = $name | |
| """, | |
| { | |
| "id": prefixed_id, | |
| "name": escape_string(node["label"]), | |
| "doc": doc_name, | |
| }, | |
| ) | |
| unique_test_edges = {} | |
| for edge in test_json["edges"]: | |
| edge_key = (edge["source"], edge["target"]) | |
| if edge_key not in unique_test_edges: | |
| unique_test_edges[edge_key] = edge | |
| for (src, tgt), edge in unique_test_edges.items(): | |
| src_id = f"TEST_{src}" | |
| tgt_id = f"TEST_{tgt}" | |
| session.run( | |
| """ | |
| MATCH (a:Entity {id:$src, doc:$doc}) | |
| MATCH (b:Entity {id:$tgt, doc:$doc}) | |
| MERGE (a)-[r:RELATED_TO {doc:$doc}]->(b) | |
| """, | |
| { | |
| "src": src_id, | |
| "tgt": tgt_id, | |
| "doc": doc_name, | |
| }, | |
| ) | |
| except Exception as e: | |
| logging.error(f"Neo4j indisponible ou mal configuré (upload): {e}") | |
| return False | |
| finally: | |
| try: | |
| driver.close() | |
| except Exception: | |
| pass | |
| logging.info( | |
| f"GT uploadé : {len(unique_gt_nodes)} nœuds, {len(unique_gt_edges)} arêtes" | |
| ) | |
| logging.info( | |
| f"Test uploadé : {len(unique_test_nodes)} nœuds, {len(unique_test_edges)} arêtes" | |
| ) | |
| return True | |
| # ============================================ | |
| # SECTION 4 : MATCHING HYBRIDE (FUZZY + SÉMANTIQUE) | |
| # ============================================ | |
| def compute_semantic_similarity_batch(test_labels, gt_labels): | |
| """Calcule la matrice de similarité sémantique en batch.""" | |
| with torch.no_grad(): | |
| test_embeddings = get_semantic_model().encode( | |
| test_labels, | |
| convert_to_tensor=True, | |
| batch_size=32, | |
| show_progress_bar=False | |
| ) | |
| gt_embeddings = get_semantic_model().encode( | |
| gt_labels, | |
| convert_to_tensor=True, | |
| batch_size=32, | |
| show_progress_bar=False | |
| ) | |
| similarity_matrix = util.cos_sim(test_embeddings, gt_embeddings) | |
| return (similarity_matrix * 100).cpu().numpy() | |
| def hybrid_match_optimized(test_labels, gt_labels, fuzzy_threshold=80, | |
| semantic_threshold=70, alpha=0.6): | |
| """Matching hybride optimisé.""" | |
| if not gt_labels or not test_labels: | |
| return [(None, 0, 0, 0) for _ in test_labels] | |
| results = [] | |
| semantic_matrix = compute_semantic_similarity_batch(test_labels, gt_labels) | |
| for i, test_lbl in enumerate(test_labels): | |
| best_match = None | |
| best_combined_score = 0 | |
| best_fuzzy = 0 | |
| best_semantic = 0 | |
| semantic_scores = semantic_matrix[i] | |
| top_k = min(5, len(gt_labels)) | |
| top_indices = semantic_scores.argsort()[-top_k:][::-1] | |
| for idx in top_indices: | |
| candidate = gt_labels[idx] | |
| semantic_score = float(semantic_scores[idx]) | |
| if semantic_score >= semantic_threshold - 20: | |
| fuzzy_match, fuzzy_score, _ = process.extractOne( | |
| test_lbl, | |
| [candidate], | |
| scorer=fuzz.ratio | |
| ) | |
| combined_score = (alpha * fuzzy_score) + ((1 - alpha) * semantic_score) | |
| if (fuzzy_score >= fuzzy_threshold or | |
| semantic_score >= semantic_threshold): | |
| if combined_score > best_combined_score: | |
| best_combined_score = combined_score | |
| best_match = candidate | |
| best_fuzzy = fuzzy_score | |
| best_semantic = semantic_score | |
| results.append((best_match, best_combined_score, best_fuzzy, best_semantic)) | |
| return results | |
| def compute_edge_metrics_undirected(gt_edges, test_edges, gt_id2lbl, test_id2lbl, label_mapping): | |
| """ | |
| Traite les arêtes comme NON-ORIENTÉES | |
| Plus de métriques d'inversion | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1: Construire les ensembles d'arêtes NON-ORIENTÉES | |
| # ═════════════════════════════════════════════════════════════════════ | |
| gt_undirected = set() | |
| for e in gt_edges: | |
| src = gt_id2lbl[e["source"]] | |
| tgt = gt_id2lbl[e["target"]] | |
| # Normaliser : toujours mettre le plus petit en premier | |
| gt_undirected.add(tuple(sorted([src, tgt]))) | |
| test_undirected = set() | |
| for e in test_edges: | |
| src = test_id2lbl[e["source"]] | |
| tgt = test_id2lbl[e["target"]] | |
| src_mapped = label_mapping.get(src, src) | |
| tgt_mapped = label_mapping.get(tgt, tgt) | |
| if src_mapped and tgt_mapped: | |
| test_undirected.add(tuple(sorted([src_mapped, tgt_mapped]))) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2: Calcul des métriques | |
| # ═════════════════════════════════════════════════════════════════════ | |
| tp = gt_undirected & test_undirected | |
| fp = test_undirected - gt_undirected | |
| fn = gt_undirected - test_undirected | |
| prec = len(tp) / len(test_undirected) * 100 if test_undirected else 100 | |
| rec = len(tp) / len(gt_undirected) * 100 if gt_undirected else 100 | |
| return { | |
| "precision_edges": round(prec, 2), | |
| "recall_edges": round(rec, 2), | |
| "f1_edges": round(2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0, 2), | |
| "tp_edges": list(tp), | |
| "fp_edges": list(fp), | |
| "fn_edges": list(fn), | |
| } | |
| def compute_graph_depth(nodes, edges): | |
| """Calcule la profondeur maximale du graphe.""" | |
| from collections import defaultdict, deque | |
| children = defaultdict(list) | |
| parents = defaultdict(list) | |
| node_ids = {n["id"] for n in nodes} | |
| for edge in edges: | |
| src, tgt = edge["source"], edge["target"] | |
| if src in node_ids and tgt in node_ids: | |
| children[src].append(tgt) | |
| parents[tgt].append(src) | |
| roots = [nid for nid in node_ids if nid not in parents] | |
| if not roots: | |
| roots = list(node_ids) | |
| node_levels = {} | |
| nodes_per_level = defaultdict(list) | |
| queue = deque([(root, 0) for root in roots]) | |
| visited = set() | |
| while queue: | |
| node, level = queue.popleft() | |
| if node in visited: | |
| continue | |
| visited.add(node) | |
| node_levels[node] = level | |
| nodes_per_level[level].append(node) | |
| for child in children[node]: | |
| if child not in visited: | |
| queue.append((child, level + 1)) | |
| for node in node_ids: | |
| if node not in node_levels: | |
| node_levels[node] = 0 | |
| nodes_per_level[0].append(node) | |
| max_depth = max(node_levels.values()) if node_levels else 0 | |
| return { | |
| "max_depth": max_depth, | |
| "node_levels": node_levels, | |
| "nodes_per_level": dict(nodes_per_level) | |
| } | |
| def compute_hierarchical_metrics(gt_nodes, gt_edges, test_nodes, test_edges, label_mapping): | |
| """Calcule les métriques hiérarchiques.""" | |
| gt_hierarchy = compute_graph_depth(gt_nodes, gt_edges) | |
| test_hierarchy = compute_graph_depth(test_nodes, test_edges) | |
| gt_depth = gt_hierarchy["max_depth"] | |
| test_depth = test_hierarchy["max_depth"] | |
| depth_diff = abs(gt_depth - test_depth) | |
| gt_id2label = {n["id"]: n["label"] for n in gt_nodes} | |
| test_id2label = {n["id"]: n["label"] for n in test_nodes} | |
| gt_label_levels = {} | |
| for node_id, level in gt_hierarchy["node_levels"].items(): | |
| label = gt_id2label.get(node_id) | |
| if label: | |
| gt_label_levels[label] = level | |
| correct_levels = 0 | |
| total_matched = 0 | |
| for test_id, test_level in test_hierarchy["node_levels"].items(): | |
| test_label = test_id2label.get(test_id) | |
| if not test_label: | |
| continue | |
| gt_label = label_mapping.get(test_label) | |
| if not gt_label: | |
| continue | |
| total_matched += 1 | |
| expected_level = gt_label_levels.get(gt_label) | |
| if expected_level is not None and expected_level == test_level: | |
| correct_levels += 1 | |
| level_accuracy = ( | |
| (correct_levels / total_matched * 100) | |
| if total_matched > 0 | |
| else 0 | |
| ) | |
| max_level = max(gt_depth, test_depth) | |
| similarity_scores = [] | |
| for level in range(max_level + 1): | |
| gt_count = len(gt_hierarchy["nodes_per_level"].get(level, [])) | |
| test_count = len(test_hierarchy["nodes_per_level"].get(level, [])) | |
| max_count = max(gt_count, test_count, 1) | |
| level_sim = 1 - abs(gt_count - test_count) / max_count | |
| similarity_scores.append(level_sim) | |
| structure_similarity = ( | |
| sum(similarity_scores) / len(similarity_scores) * 100 | |
| if similarity_scores | |
| else 0 | |
| ) | |
| return { | |
| "gt_depth": gt_depth, | |
| "test_depth": test_depth, | |
| "depth_difference": depth_diff, | |
| "depth_match": depth_diff == 0, | |
| "level_accuracy": round(level_accuracy, 2), | |
| "structure_similarity": round(structure_similarity, 2), | |
| "gt_nodes_per_level": gt_hierarchy["nodes_per_level"], | |
| "test_nodes_per_level": test_hierarchy["nodes_per_level"], | |
| "correct_level_count": correct_levels, | |
| "total_matched_nodes": total_matched | |
| } | |
| def compute_ged_coherent(gt_nodes, gt_edges, test_nodes, test_edges, | |
| hallucination_metrics, edge_metrics, label_mapping=None): | |
| """ | |
| GED avec les métriques d'arêtes | |
| """ | |
| try: | |
| # ═════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1 : Opérations sur les NŒUDS (depuis hallucination_metrics) | |
| # ═════════════════════════════════════════════════════════════════ | |
| hallucinated_list = hallucination_metrics["hallucinated_nodes"] | |
| missed_gt_list = hallucination_metrics["missed_gt_nodes"] | |
| num_node_deletions = len(hallucinated_list) # Supprimer du test | |
| num_node_insertions = len(missed_gt_list) # Ajouter au test | |
| num_node_substitutions = 0 | |
| # ═════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2 : Opérations sur les ARÊTES (depuis edge_metrics) | |
| # ═════════════════════════════════════════════════════════════════ | |
| # Utilisation de edge_metrics | |
| fp_edges = edge_metrics["fp_edges"] # Arêtes en trop | |
| fn_edges = edge_metrics["fn_edges"] # Arêtes manquantes | |
| num_edge_deletions = len(fp_edges) # À supprimer du test | |
| num_edge_insertions = len(fn_edges) # À ajouter au test | |
| num_edge_substitutions = 0 | |
| # ═════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 3 : Calcul du GED total | |
| # ═════════════════════════════════════════════════════════════════ | |
| ged = (num_node_deletions + num_node_insertions + num_node_substitutions + | |
| num_edge_deletions + num_edge_insertions + num_edge_substitutions) | |
| # Normalisation | |
| max_ops = (len(gt_nodes) + len(gt_edges) + len(test_nodes) + len(test_edges)) | |
| normalized_ged = (ged / max_ops * 100) if max_ops > 0 else 0 | |
| similarity = 100 - normalized_ged | |
| # ═════════════════════════════════════════════════════════════════ | |
| # LOGS DE VÉRIFICATION | |
| # ═════════════════════════════════════════════════════════════════ | |
| logging.info("=" * 60) | |
| logging.info("GRAPH EDIT DISTANCE (test → GT)") | |
| logging.info("=" * 60) | |
| logging.info(f" GED total : {ged:.2f} opérations") | |
| logging.info(f" • GED normalisé : {normalized_ged:.2f}%") | |
| logging.info(f" • Similarité : {similarity:.2f}%") | |
| logging.info("") | |
| logging.info(f" Opérations sur NŒUDS :") | |
| logging.info(f" • Suppressions : {num_node_deletions} (= hallucinés)") | |
| logging.info(f" • Insertions : {num_node_insertions} (= manqués GT)") | |
| logging.info("") | |
| logging.info(f" Opérations sur ARÊTES :") | |
| logging.info(f" • Suppressions : {num_edge_deletions} (= arêtes en trop)") | |
| logging.info(f" • Insertions : {num_edge_insertions} (= arêtes manquantes)") | |
| logging.info("") | |
| logging.info(f"🔹 COHÉRENCE AVEC MÉTRIQUES :") | |
| logging.info(f" ✓ fp_edges = {len(fp_edges)} → Suppressions = {num_edge_deletions}") | |
| logging.info(f" ✓ fn_edges = {len(fn_edges)} → Insertions = {num_edge_insertions}") | |
| logging.info("=" * 60) | |
| return { | |
| "ged": round(ged, 2), | |
| "normalized_ged": round(normalized_ged, 2), | |
| "structural_similarity": round(similarity, 2), | |
| "gt_nodes_count": len(gt_nodes), | |
| "gt_edges_count": len(gt_edges), | |
| "test_nodes_count": len(test_nodes), | |
| "test_edges_count": len(test_edges), | |
| # Opérations sur nœuds | |
| "num_node_deletions": num_node_deletions, | |
| "num_node_insertions": num_node_insertions, | |
| "num_node_substitutions": num_node_substitutions, | |
| # Opérations sur arêtes | |
| "num_edge_deletions": num_edge_deletions, | |
| "num_edge_insertions": num_edge_insertions, | |
| "num_edge_substitutions": num_edge_substitutions, | |
| # Listes détaillées | |
| "node_deletions": hallucinated_list[:10], | |
| "node_insertions": missed_gt_list[:10], | |
| "edge_deletions": [f"{a} - {b}" for a, b in list(fp_edges)[:10]], | |
| "edge_insertions": [f"{a} - {b}" for a, b in list(fn_edges)[:10]] | |
| } | |
| except Exception as e: | |
| logging.error(f"⚠️ Erreur lors du calcul du GED : {e}") | |
| return { | |
| "ged": None, | |
| "normalized_ged": None, | |
| "structural_similarity": None, | |
| "num_node_deletions": 0, | |
| "num_node_insertions": 0, | |
| "num_node_substitutions": 0, | |
| "num_edge_deletions": 0, | |
| "num_edge_insertions": 0, | |
| "num_edge_substitutions": 0, | |
| "error": str(e) | |
| } | |
| def calculate_hallucination_metrics(gt_nodes, test_nodes, mapped_test_labels): | |
| """Calcule les métriques d'hallucination.""" | |
| gt_labels = set(n["label"] for n in gt_nodes) | |
| test_labels = [n["label"] for n in test_nodes] | |
| hallucinated_nodes = [] | |
| correct_nodes = [] | |
| matched_gt_nodes = set() | |
| already_matched_gt = {} | |
| for test_label, matched_label in zip(test_labels, mapped_test_labels): | |
| if matched_label is None or matched_label not in gt_labels: | |
| hallucinated_nodes.append(test_label) | |
| else: | |
| if matched_label in already_matched_gt: | |
| hallucinated_nodes.append(test_label) | |
| else: | |
| correct_nodes.append(test_label) | |
| matched_gt_nodes.add(matched_label) | |
| already_matched_gt[matched_label] = test_label | |
| total_generated = len(test_labels) | |
| num_hallucinated = len(hallucinated_nodes) | |
| num_correct = len(correct_nodes) | |
| total_gt = len(gt_labels) | |
| num_detected = len(matched_gt_nodes) | |
| num_missed = total_gt - num_detected | |
| hallucination_rate = (num_hallucinated / total_generated * 100) if total_generated > 0 else 0 | |
| detection_rate = (num_detected / total_gt * 100) if total_gt > 0 else 0 | |
| return { | |
| "hallucination_rate": round(hallucination_rate, 2), | |
| "detection_rate": round(detection_rate, 2), | |
| "total_generated": total_generated, | |
| "num_hallucinated": num_hallucinated, | |
| "num_correct": num_correct, | |
| "hallucinated_nodes": hallucinated_nodes, | |
| "correct_nodes": correct_nodes, | |
| "total_gt": total_gt, | |
| "num_detected": num_detected, | |
| "num_missed": num_missed, | |
| "missed_gt_nodes": list(gt_labels - matched_gt_nodes) | |
| } | |
| def compare_graphs_with_semantic_fast(gt_nodes, gt_edges, test_nodes, test_edges, | |
| fuzzy_threshold=80, semantic_threshold=70, | |
| alpha=0.6, edge_mode="hybrid"): | |
| """ | |
| Sans inversions + GED cohérent | |
| """ | |
| gt_labels = [n["label"] for n in gt_nodes] | |
| test_labels = [n["label"] for n in test_nodes] | |
| matches = hybrid_match_optimized( | |
| test_labels, gt_labels, fuzzy_threshold, semantic_threshold, alpha | |
| ) | |
| mapped_test_labels = [] | |
| matching_details = [] | |
| for test_lbl, (match, combined_score, fuzzy_score, sem_score) in zip(test_labels, matches): | |
| if match: | |
| mapped_test_labels.append(match) | |
| matching_details.append({ | |
| "test": test_lbl, | |
| "matched": match, | |
| "combined": round(combined_score, 2), | |
| "fuzzy": round(fuzzy_score, 2), | |
| "semantic": round(sem_score, 2) | |
| }) | |
| else: | |
| mapped_test_labels.append(None) | |
| matching_details.append({ | |
| "test": test_lbl, | |
| "matched": "NO_MATCH", | |
| "combined": 0, | |
| "fuzzy": 0, | |
| "semantic": 0 | |
| }) | |
| hallucination_metrics = calculate_hallucination_metrics( | |
| gt_nodes, test_nodes, mapped_test_labels | |
| ) | |
| label_mapping = { | |
| test_lbl: match | |
| for test_lbl, (match, _, _, _) in zip(test_labels, matches) | |
| if match | |
| } | |
| hallucinated_nodes_list = hallucination_metrics["hallucinated_nodes"] | |
| gt_set = set(gt_labels) | |
| test_set = set([lbl for lbl in mapped_test_labels if lbl is not None]) | |
| tp_nodes = gt_set & test_set | |
| extra_nodes = set(hallucinated_nodes_list) | |
| fn_nodes = gt_set - test_set | |
| gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes} | |
| test_id2lbl = {n["id"]: n["label"] for n in test_nodes} | |
| # Utiliser la fonction non-orientée | |
| edge_metrics = compute_edge_metrics_undirected( | |
| gt_edges, test_edges, gt_id2lbl, test_id2lbl, label_mapping | |
| ) | |
| prec_edges = edge_metrics["precision_edges"] / 100 | |
| rec_edges = edge_metrics["recall_edges"] / 100 | |
| f1_edges = edge_metrics["f1_edges"] / 100 | |
| tp_edges = set(edge_metrics["tp_edges"]) | |
| fp_edges = set(edge_metrics["fp_edges"]) | |
| fn_edges = set(edge_metrics["fn_edges"]) | |
| prec_nodes = ( | |
| len(tp_nodes) / (len(tp_nodes) + len(extra_nodes)) | |
| if tp_nodes or extra_nodes | |
| else 1.0 | |
| ) | |
| rec_nodes = ( | |
| len(tp_nodes) / (len(tp_nodes) + len(fn_nodes)) | |
| if tp_nodes or fn_nodes | |
| else 1.0 | |
| ) | |
| f1_nodes = ( | |
| 2 * prec_nodes * rec_nodes / (prec_nodes + rec_nodes) | |
| if (prec_nodes + rec_nodes) | |
| else 0.0 | |
| ) | |
| hierarchical_metrics = compute_hierarchical_metrics( | |
| gt_nodes, gt_edges, test_nodes, test_edges, label_mapping | |
| ) | |
| # Passer edge_metrics au GED | |
| ged_metrics = compute_ged_coherent( | |
| gt_nodes, gt_edges, | |
| test_nodes, test_edges, | |
| hallucination_metrics, | |
| edge_metrics, | |
| label_mapping | |
| ) | |
| return { | |
| "precision_nodes": round(prec_nodes * 100, 2), | |
| "recall_nodes": round(rec_nodes * 100, 2), | |
| "f1_nodes": round(f1_nodes * 100, 2), | |
| "precision_edges": round(prec_edges * 100, 2), | |
| "recall_edges": round(rec_edges * 100, 2), | |
| "f1_edges": round(f1_edges * 100, 2), | |
| "overall_precision": round(((prec_nodes + prec_edges) / 2) * 100, 2), | |
| "overall_recall": round(((rec_nodes + rec_edges) / 2) * 100, 2), | |
| "overall_f1": round(((f1_nodes + f1_edges) / 2) * 100, 2), | |
| "missing_nodes": list(fn_nodes), | |
| "extra_nodes": list(extra_nodes), | |
| "missing_edges": list(fn_edges), | |
| "extra_edges": list(fp_edges), | |
| "matching_details": matching_details, | |
| "tp_edges": list(tp_edges), | |
| "fp_edges": list(fp_edges), | |
| "fn_edges": list(fn_edges), | |
| "hallucination_rate": hallucination_metrics["hallucination_rate"], | |
| "detection_rate": hallucination_metrics["detection_rate"], | |
| "total_generated": hallucination_metrics["total_generated"], | |
| "num_hallucinated": hallucination_metrics["num_hallucinated"], | |
| "num_correct": hallucination_metrics["num_correct"], | |
| "hallucinated_nodes": hallucination_metrics["hallucinated_nodes"], | |
| "total_gt": hallucination_metrics["total_gt"], | |
| "num_detected": hallucination_metrics["num_detected"], | |
| "num_missed": hallucination_metrics["num_missed"], | |
| "missed_gt_nodes": hallucination_metrics["missed_gt_nodes"], | |
| "gt_depth": hierarchical_metrics["gt_depth"], | |
| "test_depth": hierarchical_metrics["test_depth"], | |
| "depth_difference": hierarchical_metrics["depth_difference"], | |
| "depth_match": hierarchical_metrics["depth_match"], | |
| "level_accuracy": hierarchical_metrics["level_accuracy"], | |
| "structure_similarity": hierarchical_metrics["structure_similarity"], | |
| "gt_nodes_per_level": hierarchical_metrics["gt_nodes_per_level"], | |
| "test_nodes_per_level": hierarchical_metrics["test_nodes_per_level"], | |
| "ged": ged_metrics.get("ged"), | |
| "normalized_ged": ged_metrics.get("normalized_ged"), | |
| "structural_similarity": ged_metrics.get("structural_similarity"), | |
| "num_node_insertions": ged_metrics.get("num_node_insertions", 0), | |
| "num_node_deletions": ged_metrics.get("num_node_deletions", 0), | |
| "num_edge_insertions": ged_metrics.get("num_edge_insertions", 0), | |
| "num_edge_deletions": ged_metrics.get("num_edge_deletions", 0), | |
| } | |
| # ============================================ | |
| # SECTION 5 : MÉTRIQUES D'HALLUCINATION | |
| # ============================================ | |
| def calculate_hallucination_metrics(gt_nodes, test_nodes, mapped_test_labels): | |
| """ | |
| RÈGLES STRICTES : | |
| - num_correct = nœuds test qui ont matché avec GT | |
| - num_hallucinated = nœuds test qui n'ont PAS matché | |
| - num_detected = nœuds GT qui ont été trouvés par le test | |
| - num_missed = nœuds GT qui n'ont PAS été trouvés | |
| COHÉRENCE OBLIGATOIRE : | |
| - num_correct + num_hallucinated = total_generated | |
| - num_detected + num_missed = total_gt | |
| - num_correct ≤ num_detected (car un nœud test peut matcher plusieurs fois) | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1: Extraction des labels | |
| # ═════════════════════════════════════════════════════════════════════ | |
| gt_labels = set(n["label"] for n in gt_nodes) | |
| test_labels = [n["label"] for n in test_nodes] | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2: Classification avec prévention des doublons | |
| # ═════════════════════════════════════════════════════════════════════ | |
| hallucinated_nodes = [] | |
| correct_nodes = [] | |
| matched_gt_nodes = set() | |
| already_matched_gt = {} # {gt_label: test_label} pour tracer les doublons | |
| for test_label, matched_label in zip(test_labels, mapped_test_labels): | |
| if matched_label is None or matched_label not in gt_labels: | |
| hallucinated_nodes.append(test_label) | |
| else: | |
| # Vérifier si ce GT a déjà été matché | |
| if matched_label in already_matched_gt: | |
| # Doublon → compté comme hallucination | |
| hallucinated_nodes.append(test_label) | |
| logging.warning( | |
| f"⚠️ Doublon : '{test_label}' matche '{matched_label}' " | |
| f"déjà matché par '{already_matched_gt[matched_label]}'" | |
| ) | |
| else: | |
| # Premier match → correct | |
| correct_nodes.append(test_label) | |
| matched_gt_nodes.add(matched_label) | |
| already_matched_gt[matched_label] = test_label | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 3: Calcul des statistiques | |
| # ═════════════════════════════════════════════════════════════════════ | |
| total_generated = len(test_labels) | |
| num_hallucinated = len(hallucinated_nodes) | |
| num_correct = len(correct_nodes) | |
| total_gt = len(gt_labels) | |
| num_detected = len(matched_gt_nodes) | |
| num_missed = total_gt - num_detected | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 4: Vérifications de cohérence | |
| # ═════════════════════════════════════════════════════════════════════ | |
| assert num_correct + num_hallucinated == total_generated, \ | |
| f"❌ {num_correct} + {num_hallucinated} ≠ {total_generated}" | |
| assert num_detected + num_missed == total_gt, \ | |
| f"❌ {num_detected} + {num_missed} ≠ {total_gt}" | |
| assert num_correct == num_detected, \ | |
| f"❌ CRITIQUE : {num_correct} corrects ≠ {num_detected} détectés" | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 5: Calcul des taux | |
| # ═════════════════════════════════════════════════════════════════════ | |
| hallucination_rate = (num_hallucinated / total_generated * 100) if total_generated > 0 else 0 | |
| detection_rate = (num_detected / total_gt * 100) if total_gt > 0 else 0 | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 6: Logs détaillés | |
| # ═════════════════════════════════════════════════════════════════════ | |
| logging.info("=" * 60) | |
| logging.info("📊 MÉTRIQUES D'HALLUCINATION") | |
| logging.info("=" * 60) | |
| logging.info(f"🔹 Vue MODÈLE (test) :") | |
| logging.info(f" • Total généré : {total_generated}") | |
| logging.info(f" • Corrects : {num_correct}") | |
| logging.info(f" • Hallucinés : {num_hallucinated}") | |
| logging.info(f" ✓ {num_correct} + {num_hallucinated} = {total_generated}") | |
| logging.info("") | |
| logging.info(f"🔹 Vue GROUND TRUTH :") | |
| logging.info(f" • Total GT : {total_gt}") | |
| logging.info(f" • Détectés : {num_detected}") | |
| logging.info(f" • Manqués : {num_missed}") | |
| logging.info(f" ✓ {num_detected} + {num_missed} = {total_gt}") | |
| logging.info("") | |
| logging.info(f"🔹 COHÉRENCE :") | |
| logging.info(f" ✓ Corrects = Détectés : {num_correct} = {num_detected}") | |
| logging.info("=" * 60) | |
| return { | |
| "hallucination_rate": round(hallucination_rate, 2), | |
| "detection_rate": round(detection_rate, 2), | |
| "total_generated": total_generated, | |
| "num_hallucinated": num_hallucinated, | |
| "num_correct": num_correct, | |
| "hallucinated_nodes": hallucinated_nodes, | |
| "correct_nodes": correct_nodes, | |
| "total_gt": total_gt, | |
| "num_detected": num_detected, | |
| "num_missed": num_missed, | |
| "missed_gt_nodes": list(gt_labels - matched_gt_nodes) | |
| } | |
| # ============================================ | |
| # SECTION 6 : COMPARAISON AVEC FUZZY UNIQUEMENT | |
| # ============================================ | |
| def compare_graphs(gt_nodes, gt_edges, test_nodes, test_edges, threshold=100): | |
| """ | |
| Compare deux graphes en utilisant UNIQUEMENT le fuzzy matching. | |
| Args: | |
| gt_nodes (list): Nœuds du ground truth | |
| gt_edges (list): Arêtes du ground truth | |
| test_nodes (list): Nœuds générés | |
| test_edges (list): Arêtes générées | |
| threshold (int): Seuil de similarité fuzzy (0-100) | |
| Returns: | |
| dict: Mêmes métriques que compare_graphs_with_semantic_fast | |
| Note: | |
| Version simplifiée sans matching sémantique. | |
| Utilisée quand use_semantic=False dans l'interface. | |
| Plus rapide mais moins précise pour les synonymes. | |
| Différence avec la version sémantique: | |
| - Pas de batch processing | |
| - Pas de Top-K filtering | |
| - Uniquement distance de Levenshtein | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1: Extraction des labels | |
| # ═════════════════════════════════════════════════════════════════════ | |
| gt_labels = [n["label"] for n in gt_nodes] | |
| test_labels = [n["label"] for n in test_nodes] | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2: Fuzzy matching pour chaque label test | |
| # ═════════════════════════════════════════════════════════════════════ | |
| mapped_test_labels = [] | |
| for lbl in test_labels: | |
| # Trouver le meilleur match dans GT selon la distance de Levenshtein | |
| match, score, _ = process.extractOne( | |
| lbl, | |
| gt_labels, | |
| scorer=fuzz.ratio # Distance de Levenshtein normalisée [0-100] | |
| ) | |
| # Accepter le match seulement si score >= seuil | |
| if score >= threshold: | |
| mapped_test_labels.append(match) | |
| else: | |
| mapped_test_labels.append(None) # Pas de match | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 3: Calcul des métriques d'hallucination | |
| # ═════════════════════════════════════════════════════════════════════ | |
| hallucination_metrics = calculate_hallucination_metrics( | |
| gt_nodes, test_nodes, mapped_test_labels | |
| ) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 4: Calcul des métriques (identique à la version sémantique) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # [Reste du code identique à compare_graphs_with_semantic_fast] | |
| # Pour éviter la duplication, je n'annote que les différences | |
| gt_set = set(gt_labels) | |
| test_set = set([lbl for lbl in mapped_test_labels if lbl is not None]) | |
| tp_nodes = gt_set & test_set | |
| fp_nodes = test_set - gt_set | |
| fn_nodes = gt_set - test_set | |
| # Fonction interne pour remapper les arêtes | |
| def remap_edges(edges, id_to_label): | |
| """Remappe les arêtes avec fuzzy matching sur les endpoints.""" | |
| remapped = [] | |
| for e in edges: | |
| a = id_to_label[e["source"]] | |
| b = id_to_label[e["target"]] | |
| # Fuzzy-map les endpoints | |
| a_match, a_score, _ = process.extractOne(a, gt_labels, scorer=fuzz.ratio) | |
| b_match, b_score, _ = process.extractOne(b, gt_labels, scorer=fuzz.ratio) | |
| if a_score >= threshold: a = a_match | |
| if b_score >= threshold: b = b_match | |
| remapped.append((a, b)) | |
| return set(remapped) | |
| gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes} | |
| test_id2lbl = {n["id"]: n["label"] for n in test_nodes} | |
| gt_edges_set = remap_edges(gt_edges, gt_id2lbl) | |
| test_edges_set = remap_edges(test_edges, test_id2lbl) | |
| tp_edges = gt_edges_set & test_edges_set | |
| fp_edges = test_edges_set - gt_edges_set | |
| fn_edges = gt_edges_set - test_edges_set | |
| # Calcul des métriques | |
| prec_nodes = len(tp_nodes) / (len(tp_nodes) + len(fp_nodes)) if tp_nodes or fp_nodes else 1.0 | |
| rec_nodes = len(tp_nodes) / (len(tp_nodes) + len(fn_nodes)) if tp_nodes or fn_nodes else 1.0 | |
| f1_nodes = (2 * prec_nodes * rec_nodes / (prec_nodes + rec_nodes) | |
| if (prec_nodes + rec_nodes) else 0.0) | |
| prec_edges = len(tp_edges) / (len(tp_edges) + len(fp_edges)) if tp_edges or fp_edges else 1.0 | |
| rec_edges = len(tp_edges) / (len(tp_edges) + len(fn_edges)) if tp_edges or fn_edges else 1.0 | |
| f1_edges = (2 * prec_edges * rec_edges / (prec_edges + rec_edges) | |
| if (prec_edges + rec_edges) else 0.0) | |
| return { | |
| "precision_nodes": round(prec_nodes * 100, 2), | |
| "recall_nodes": round(rec_nodes * 100, 2), | |
| "f1_nodes": round(f1_nodes * 100, 2), | |
| "precision_edges": round(prec_edges * 100, 2), | |
| "recall_edges": round(rec_edges * 100, 2), | |
| "f1_edges": round(f1_edges * 100, 2), | |
| "overall_precision": round(((prec_nodes + prec_edges) / 2) * 100, 2), | |
| "overall_recall": round(((rec_nodes + rec_edges) / 2) * 100, 2), | |
| "overall_f1": round(((f1_nodes + f1_edges) / 2) * 100, 2), | |
| "missing_nodes": list(fn_nodes), | |
| "extra_nodes": list(fp_nodes), | |
| "missing_edges": list(fn_edges), | |
| "extra_edges": list(fp_edges), | |
| # Métriques d'hallucination (même structure que version sémantique) | |
| "hallucination_rate": hallucination_metrics["hallucination_rate"], | |
| "detection_rate": hallucination_metrics["detection_rate"], | |
| "total_generated": hallucination_metrics["total_generated"], | |
| "num_hallucinated": hallucination_metrics["num_hallucinated"], | |
| "num_correct": hallucination_metrics["num_correct"], | |
| "hallucinated_nodes": hallucination_metrics["hallucinated_nodes"], | |
| "total_gt": hallucination_metrics["total_gt"], | |
| "num_detected": hallucination_metrics["num_detected"], | |
| "num_missed": hallucination_metrics["num_missed"], | |
| "missed_gt_nodes": hallucination_metrics["missed_gt_nodes"] | |
| } | |
| # ============================================ | |
| # SECTION 7 : GÉNÉRATION DE RAPPORTS | |
| # ============================================ | |
| def save_results_to_pdf(results, model_name): | |
| """ | |
| Génère un rapport PDF moderne et visuellement attractif. | |
| """ | |
| os.makedirs("reports", exist_ok=True) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # HELPER : Nettoyer les caractères problématiques | |
| # ═════════════════════════════════════════════════════════════════════ | |
| def clean_text(text): | |
| """ | |
| Nettoie le texte pour éviter les erreurs Unicode avec FPDF. | |
| Remplace les caractères problématiques par des équivalents ASCII. | |
| """ | |
| if not isinstance(text, str): | |
| text = str(text) | |
| # Dictionnaire de remplacement des caractères spéciaux | |
| replacements = { | |
| '→': '->', | |
| '←': '<-', | |
| '↔': '<->', | |
| '…': '...', | |
| '–': '-', | |
| '—': '-', | |
| '"': '"', | |
| '"': '"', | |
| ''': "'", | |
| ''': "'", | |
| '«': '"', | |
| '»': '"', | |
| '×': 'x', | |
| '÷': '/', | |
| '≤': '<=', | |
| '≥': '>=', | |
| '≠': '!=', | |
| '≈': '~=', | |
| '∞': 'inf', | |
| '√': 'sqrt', | |
| '∑': 'sum', | |
| '∏': 'prod', | |
| '∫': 'int', | |
| '∂': 'd', | |
| '∆': 'delta', | |
| '∇': 'nabla', | |
| '∈': 'in', | |
| '∉': 'not in', | |
| '⊂': 'subset', | |
| '⊃': 'superset', | |
| '∩': 'intersect', | |
| '∪': 'union', | |
| '∧': 'and', | |
| '∨': 'or', | |
| '¬': 'not', | |
| '⊕': 'xor', | |
| '⊗': 'otimes', | |
| '①': '(1)', | |
| '②': '(2)', | |
| '③': '(3)', | |
| '④': '(4)', | |
| '⑤': '(5)', | |
| '⑥': '(6)', | |
| '⑦': '(7)', | |
| '⑧': '(8)', | |
| '⑨': '(9)', | |
| '⑩': '(10)', | |
| } | |
| # Appliquer les remplacements | |
| for old, new in replacements.items(): | |
| text = text.replace(old, new) | |
| # Supprimer les caractères non-imprimables et les emojis | |
| # Garder uniquement : lettres, chiffres, ponctuation basique, espaces | |
| cleaned = "" | |
| for char in text: | |
| code = ord(char) | |
| # Garder les caractères ASCII imprimables + caractères latins étendus | |
| if (32 <= code <= 126) or (160 <= code <= 255): | |
| cleaned += char | |
| elif code == 9 or code == 10 or code == 13: # Tab, LF, CR | |
| cleaned += char | |
| else: | |
| cleaned += "?" # Remplacer les caractères non supportés | |
| return cleaned | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # Configuration du PDF | |
| # ═════════════════════════════════════════════════════════════════════ | |
| pdf = FPDF() | |
| base = os.path.abspath(_SCRIPT_DIR) | |
| regular = os.path.abspath(os.path.join(base, "DejaVuSans.ttf")) | |
| bold = os.path.abspath(os.path.join(base, "DejaVuSans-Bold.ttf")) | |
| if not os.path.exists(regular) or not os.path.exists(bold): | |
| raise FileNotFoundError( | |
| f"Police introuvable. Attendu: '{regular}' et '{bold}'." | |
| ) | |
| import glob | |
| cache_dirs = {base, os.getcwd()} | |
| for d in cache_dirs: | |
| try: | |
| for cache_file in glob.glob(os.path.join(d, "DejaVu*.pkl")): | |
| try: | |
| os.remove(cache_file) | |
| except Exception: | |
| pass | |
| except Exception: | |
| pass | |
| FONT_FAMILY = "DejaVuLocal" | |
| pdf.add_font(FONT_FAMILY, "", regular, uni=True) | |
| pdf.add_font(FONT_FAMILY, "B", bold, uni=True) | |
| _orig_set_font = pdf.set_font | |
| def _set_font(_family, *args, **kwargs): | |
| return _orig_set_font(FONT_FAMILY, *args, **kwargs) | |
| pdf.set_font = _set_font | |
| pdf.add_page() | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # HELPER FUNCTIONS | |
| # ═════════════════════════════════════════════════════════════════════ | |
| def draw_colored_header(text, color_r, color_g, color_b): | |
| """Dessine un en-tête coloré.""" | |
| pdf.set_fill_color(color_r, color_g, color_b) | |
| pdf.set_text_color(255, 255, 255) | |
| pdf.set_font("DejaVu", "B", 14) | |
| pdf.cell(0, 10, clean_text(text), ln=True, align="C", fill=True) # ← NETTOYAGE | |
| pdf.set_text_color(0, 0, 0) | |
| pdf.ln(3) | |
| def draw_section_title(text, emoji=""): | |
| """Dessine un titre de section.""" | |
| pdf.set_font("DejaVu", "B", 11) | |
| pdf.set_fill_color(240, 240, 240) | |
| pdf.cell(0, 8, clean_text(text), ln=True, fill=True) # | |
| pdf.ln(2) | |
| def draw_progress_bar(label, value, max_value=100, width=140): | |
| """Dessine une barre de progression colorée.""" | |
| pdf.set_font("DejaVu", "", 9) | |
| # Label (nettoyé) | |
| pdf.cell(50, 6, clean_text(label), 0, 0) | |
| # Valeur | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.cell(15, 6, f"{value:.1f}%", 0, 0, "R") | |
| # Barre de fond (gris clair) | |
| x_start = pdf.get_x() + 2 | |
| y_start = pdf.get_y() | |
| pdf.set_fill_color(220, 220, 220) | |
| pdf.rect(x_start, y_start + 1, width, 4, "F") | |
| # Barre de progression (couleur selon valeur) | |
| if value >= 80: | |
| pdf.set_fill_color(76, 175, 80) # Vert | |
| elif value >= 60: | |
| pdf.set_fill_color(255, 193, 7) # Jaune | |
| else: | |
| pdf.set_fill_color(244, 67, 54) # Rouge | |
| progress_width = (value / max_value) * width | |
| pdf.rect(x_start, y_start + 1, progress_width, 4, "F") | |
| pdf.ln(7) | |
| def draw_metric_box(label, value, color_r, color_g, color_b): | |
| """Dessine une boîte de métrique colorée.""" | |
| pdf.set_fill_color(color_r, color_g, color_b) | |
| pdf.set_text_color(255, 255, 255) | |
| pdf.set_font("DejaVu", "B", 10) | |
| # Boîte colorée (nettoyé) | |
| pdf.cell(60, 8, clean_text(label), 1, 0, "C", fill=True) | |
| pdf.cell(30, 8, clean_text(str(value)), 1, 0, "C", fill=True) | |
| pdf.set_text_color(0, 0, 0) | |
| pdf.ln() | |
| def draw_table_header(headers): | |
| """Dessine l'en-tête d'un tableau.""" | |
| pdf.set_fill_color(63, 81, 181) # Bleu | |
| pdf.set_text_color(255, 255, 255) | |
| pdf.set_font("DejaVu", "B", 9) | |
| col_width = 190 / len(headers) | |
| for header in headers: | |
| pdf.cell(col_width, 7, clean_text(header), 1, 0, "C", fill=True) # ← NETTOYAGE | |
| pdf.ln() | |
| pdf.set_text_color(0, 0, 0) | |
| def draw_table_row(values, fill=False): | |
| """Dessine une ligne de tableau.""" | |
| pdf.set_font("DejaVu", "", 8) | |
| if fill: | |
| pdf.set_fill_color(245, 245, 245) | |
| col_width = 190 / len(values) | |
| for value in values: | |
| pdf.cell(col_width, 6, clean_text(str(value)), 1, 0, "C", fill) # ← NETTOYAGE | |
| pdf.ln() | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # EN-TÊTE PRINCIPAL DU RAPPORT | |
| # ═════════════════════════════════════════════════════════════════════ | |
| draw_colored_header(f"RAPPORT DE PERFORMANCE - {model_name}", 33, 150, 243) | |
| # date et heure | |
| from datetime import datetime | |
| now = datetime.now() | |
| date_str = now.strftime('%d/%m/%Y') | |
| heure_str = now.strftime('%H:%M:%S') | |
| pdf.set_font("DejaVu", "", 9) | |
| pdf.cell(0, 5, f"Date : {date_str}", ln=True, align="C") | |
| pdf.cell(0, 5, f"Heure : {heure_str}", ln=True, align="C") | |
| pdf.cell(0, 5, f"Modele : {model_name}", ln=True, align="C") | |
| pdf.cell(0, 5, f"Nombre de documents : {len(results)}", ln=True, align="C") | |
| pdf.ln(5) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # BOUCLE SUR CHAQUE FICHIER | |
| # ═════════════════════════════════════════════════════════════════════ | |
| for idx, res in enumerate(results, 1): | |
| details = res["details"] | |
| if idx > 1: | |
| pdf.add_page() | |
| # Titre avec nom du document + modèle | |
| pdf.set_fill_color(96, 125, 139) | |
| pdf.set_text_color(255, 255, 255) | |
| pdf.set_font("DejaVu", "B", 12) | |
| pdf.cell(0, 10, clean_text(f"Document {idx}: {res['file']}"), ln=True, fill=True, align="C") | |
| # Ajout du modèle et de l'heure de traitement | |
| pdf.set_text_color(0, 0, 0) | |
| pdf.set_font("DejaVu", "", 8) | |
| pdf.cell(0, 5, f"Modele utilise : {model_name} | Traite le : {date_str} a {heure_str}", ln=True, align="C") | |
| pdf.ln(3) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION 1: Vue d'ensemble | |
| # ───────────────────────────────────────────────────────────────── | |
| draw_section_title("VUE D'ENSEMBLE") | |
| draw_progress_bar("Precision Globale", details.get('overall_precision', 0)) | |
| draw_progress_bar("Rappel Global", details.get('overall_recall', 0)) | |
| draw_progress_bar("F1-Score Global", details.get('overall_f1', 0)) | |
| pdf.ln(3) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION 2: Métriques des Nœuds | |
| # ───────────────────────────────────────────────────────────────── | |
| draw_section_title("ANALYSE DES NOEUDS") | |
| # Tableau des métriques | |
| draw_table_header(["Metrique", "Precision", "Rappel", "F1-Score"]) | |
| draw_table_row([ | |
| "Noeuds", | |
| f"{details.get('precision_nodes', 0)}%", | |
| f"{details.get('recall_nodes', 0)}%", | |
| f"{details.get('f1_nodes', 0)}%" | |
| ]) | |
| pdf.ln(3) | |
| # Statistiques de génération | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.cell(0, 6, "Statistiques de generation :", ln=True) | |
| pdf.set_font("DejaVu", "", 9) | |
| total_gen = details.get('total_generated', 0) | |
| num_correct = details.get('num_correct', 0) | |
| num_halluc = details.get('num_hallucinated', 0) | |
| total_gt = details.get('total_gt', 0) | |
| num_detected = details.get('num_detected', 0) | |
| num_missed = details.get('num_missed', 0) | |
| # Ligne 1 : Nœuds générés vs GT | |
| pdf.cell(95, 6, f" Noeuds generes : {total_gen}", 0, 0) | |
| pdf.cell(95, 6, f" Noeuds GT : {total_gt}", 0, 1) | |
| # Ligne 2 : Nœuds corrects vs détectés | |
| pdf.cell(95, 6, f" Noeuds corrects : {num_correct}", 0, 0) | |
| pdf.cell(95, 6, f" Noeuds detectes : {num_detected}", 0, 1) | |
| # Ligne 3 : Hallucinations vs manqués | |
| pdf.cell(95, 6, f" Noeuds hallucines : {num_halluc}", 0, 0) | |
| pdf.cell(95, 6, f" Noeuds GT manques : {num_missed}", 0, 1) | |
| # Ligne 4 : Taux (hallucination + détection) | |
| halluc_rate = details.get('hallucination_rate', 0) | |
| detect_rate = details.get('detection_rate', 0) | |
| pdf.cell(95, 6, f" Taux hallucination : {halluc_rate}%", 0, 0) | |
| pdf.cell(95, 6, f" Taux detection : {detect_rate}%", 0, 1) | |
| pdf.ln(3) | |
| # ═════════════════════════════════════════════════════════════════ | |
| # Détails des nœuds GT manqués | |
| # ═════════════════════════════════════════════════════════════════ | |
| missed_gt_nodes = details.get('missed_gt_nodes', []) | |
| if missed_gt_nodes: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 243, 224) # Fond orange clair | |
| pdf.cell(0, 6, f"Noeuds GT non detectes ({len(missed_gt_nodes)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| # Afficher les 5 premiers | |
| for i, node in enumerate(missed_gt_nodes[:5], 1): | |
| pdf.cell(5, 5, "", 0, 0) # Indentation | |
| pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True) | |
| if len(missed_gt_nodes) > 5: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(missed_gt_nodes) - 5} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════════ | |
| # Détails des nœuds hallucinés (si présents) | |
| # ═════════════════════════════════════════════════════════════════ | |
| hallucinated_nodes = details.get('hallucinated_nodes', []) | |
| if hallucinated_nodes: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 235, 230) # Fond rouge clair | |
| pdf.cell(0, 6, f"Noeuds hallucines ({len(hallucinated_nodes)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| # Afficher les 5 premiers | |
| for i, node in enumerate(hallucinated_nodes[:5], 1): | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True) | |
| if len(hallucinated_nodes) > 5: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(hallucinated_nodes) - 5} autres", ln=True) | |
| pdf.ln(2) | |
| pdf.ln(2) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION 3: Métriques des Arêtes | |
| # ───────────────────────────────────────────────────────────────── | |
| draw_section_title("ANALYSE DES ARETES") # ← Sans emoji | |
| # Tableau des métriques | |
| draw_table_header(["Metrique", "Precision", "Rappel", "F1-Score"]) | |
| draw_table_row([ | |
| "Aretes", | |
| f"{details.get('precision_edges', 0)}%", | |
| f"{details.get('recall_edges', 0)}%", | |
| f"{details.get('f1_edges', 0)}%" | |
| ]) | |
| pdf.ln(2) | |
| # Statistiques des arêtes | |
| tp_edges = details.get('tp_edges', []) | |
| fp_edges = details.get('fp_edges', []) | |
| fn_edges = details.get('fn_edges', []) | |
| inverted = details.get('inverted_edges', []) | |
| if not isinstance(tp_edges, list): | |
| tp_edges = list(tp_edges) if tp_edges else [] | |
| if not isinstance(fp_edges, list): | |
| fp_edges = list(fp_edges) if fp_edges else [] | |
| if not isinstance(fn_edges, list): | |
| fn_edges = list(fn_edges) if fn_edges else [] | |
| if not isinstance(inverted, list): | |
| inverted = list(inverted) if inverted else [] | |
| num_tp = len(tp_edges) | |
| num_fp = len(fp_edges) | |
| num_fn = len(fn_edges) | |
| num_inv = len(inverted) | |
| total_gt_edges = num_tp + num_fn | |
| total_test_edges = num_tp + num_fp + num_inv | |
| # Tableau récapitulatif | |
| draw_table_header(["Type", "GT", "Test", "Correctes", "Manquantes", "En trop", "Inversees"]) | |
| draw_table_row([ | |
| "Aretes", | |
| str(total_gt_edges), | |
| str(total_test_edges), | |
| str(num_tp), | |
| str(num_fn), | |
| str(num_fp), | |
| str(num_inv) | |
| ]) | |
| pdf.ln(3) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION 4: Hiérarchie | |
| # ───────────────────────────────────────────────────────────────── | |
| if 'gt_depth' in details: | |
| draw_section_title("STRUCTURE HIERARCHIQUE") # ← Sans emoji | |
| gt_depth = details.get('gt_depth', 0) | |
| test_depth = details.get('test_depth', 0) | |
| # Tableau hiérarchie | |
| draw_table_header(["Metrique", "Valeur"]) | |
| draw_table_row(["Profondeur GT", f"{gt_depth} niveaux"], fill=True) | |
| draw_table_row(["Profondeur Test", f"{test_depth} niveaux"]) | |
| draw_table_row(["Difference", f"{details.get('depth_difference', 0)} niveaux"], fill=True) | |
| draw_table_row(["Precision niveaux", f"{details.get('level_accuracy', 0)}%"]) | |
| draw_table_row(["Similarite structure", f"{details.get('structure_similarity', 0)}%"], fill=True) | |
| pdf.ln(2) | |
| # Distribution par niveau | |
| if details.get('gt_nodes_per_level') and details.get('test_nodes_per_level'): | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.cell(0, 6, "Distribution par niveau :", ln=True) | |
| draw_table_header(["Niveau", "Noeuds GT", "Noeuds Test", "Ecart"]) | |
| max_level = max( | |
| max(details['gt_nodes_per_level'].keys(), default=0), | |
| max(details['test_nodes_per_level'].keys(), default=0) | |
| ) | |
| for level in range(max_level + 1): | |
| gt_count = len(details['gt_nodes_per_level'].get(level, [])) | |
| test_count = len(details['test_nodes_per_level'].get(level, [])) | |
| ecart = abs(gt_count - test_count) | |
| draw_table_row([ | |
| f"Niveau {level}", | |
| str(gt_count), | |
| str(test_count), | |
| str(ecart) | |
| ], fill=(level % 2 == 0)) | |
| pdf.ln(3) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION : Graph Edit Distance (GED) | |
| # ───────────────────────────────────────────────────────────────── | |
| if 'ged' in details and details['ged'] is not None: | |
| draw_section_title("GRAPH EDIT DISTANCE (GED)") | |
| ged_value = details.get('ged', 0) | |
| normalized_ged = details.get('normalized_ged', 0) | |
| structural_sim = details.get('structural_similarity', 0) | |
| # Tableau principal GED | |
| draw_table_header(["Metrique", "Valeur"]) | |
| draw_table_row(["GED (operations)", f"{ged_value:.2f}"], fill=True) | |
| draw_table_row(["GED Normalise", f"{normalized_ged:.2f}%"]) | |
| draw_table_row(["Similarite Structurelle", f"{structural_sim:.2f}%"], fill=True) | |
| pdf.ln(2) | |
| # Détails des opérations | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.cell(0, 6, "Operations requises :", ln=True) | |
| node_ins = details.get('num_node_insertions', 0) | |
| node_del = details.get('num_node_deletions', 0) | |
| edge_ins = details.get('num_edge_insertions', 0) | |
| edge_del = details.get('num_edge_deletions', 0) | |
| # Tableau des opérations | |
| draw_table_header(["Type", "Insertions", "Suppressions", "Total"]) | |
| draw_table_row(["Noeuds", str(node_ins), str(node_del), str(node_ins + node_del)], fill=True) | |
| draw_table_row(["Aretes", str(edge_ins), str(edge_del), str(edge_ins + edge_del)]) | |
| pdf.ln(3) | |
| # ───────────────────────────────────────────────────────────────── | |
| # SECTION 5: Détails des erreurs | |
| # ───────────────────────────────────────────────────────────────── | |
| if pdf.get_y() < 200: # Augmenter la limite pour avoir plus d'espace | |
| draw_section_title("DETAILS DES ERREURS") | |
| pdf.set_font("DejaVu", "", 8) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Nœuds hallucinés | |
| # ═════════════════════════════════════════════════════════════ | |
| hallucinated_nodes = details.get('hallucinated_nodes', []) | |
| if hallucinated_nodes: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 235, 230) # Fond rouge clair | |
| pdf.cell(0, 6, f"Noeuds hallucines ({len(hallucinated_nodes)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| for i, node in enumerate(hallucinated_nodes[:10], 1): # Top 10 | |
| pdf.cell(5, 5, "", 0, 0) # Indentation | |
| pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True) | |
| if len(hallucinated_nodes) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(hallucinated_nodes) - 10} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Nœuds GT manqués | |
| # ═════════════════════════════════════════════════════════════ | |
| missed_gt_nodes = details.get('missed_gt_nodes', []) | |
| if missed_gt_nodes: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 248, 225) # Fond orange clair | |
| pdf.cell(0, 6, f"Noeuds GT manques ({len(missed_gt_nodes)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| for i, node in enumerate(missed_gt_nodes[:10], 1): # Top 10 | |
| pdf.cell(5, 5, "", 0, 0) # Indentation | |
| pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True) | |
| if len(missed_gt_nodes) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(missed_gt_nodes) - 10} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Nœuds en trop (extra_nodes) | |
| # ═════════════════════════════════════════════════════════════ | |
| extra_nodes = details.get('extra_nodes', []) | |
| if extra_nodes: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(240, 240, 255) # Fond bleu clair | |
| pdf.cell(0, 6, f"Noeuds en trop ({len(extra_nodes)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| for i, node in enumerate(extra_nodes[:10], 1): | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True) | |
| if len(extra_nodes) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(extra_nodes) - 10} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Arêtes manquantes | |
| # ═════════════════════════════════════════════════════════════ | |
| missing_edges = details.get('missing_edges', []) | |
| if missing_edges: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 243, 224) # Fond jaune clair | |
| pdf.cell(0, 6, f"Aretes manquantes ({len(missing_edges)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| for i, edge in enumerate(missing_edges[:10], 1): | |
| if isinstance(edge, tuple) and len(edge) == 2: | |
| src, tgt = edge | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt}"), ln=True) | |
| if len(missing_edges) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(missing_edges) - 10} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Arêtes en trop | |
| # ═════════════════════════════════════════════════════════════ | |
| extra_edges = details.get('extra_edges', []) | |
| if extra_edges: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(235, 245, 255) # Fond cyan clair | |
| pdf.cell(0, 6, f"Aretes en trop ({len(extra_edges)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| for i, edge in enumerate(extra_edges[:10], 1): | |
| if isinstance(edge, tuple) and len(edge) == 2: | |
| src, tgt = edge | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt}"), ln=True) | |
| if len(extra_edges) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(extra_edges) - 10} autres", ln=True) | |
| pdf.ln(2) | |
| # ═════════════════════════════════════════════════════════════ | |
| # Arêtes inversées | |
| # ═════════════════════════════════════════════════════════════ | |
| inverted_edges = details.get('inverted_edges', []) | |
| if inverted_edges: | |
| pdf.set_font("DejaVu", "B", 9) | |
| pdf.set_fill_color(255, 240, 245) # Fond rose clair | |
| pdf.cell(0, 6, f"Aretes inversees ({len(inverted_edges)}) :", ln=True, fill=True) | |
| pdf.set_font("DejaVu", "", 8) | |
| if not isinstance(inverted_edges, list): | |
| inverted_edges = list(inverted_edges) if inverted_edges else [] | |
| for i, edge in enumerate(inverted_edges[:10], 1): | |
| if isinstance(edge, tuple) and len(edge) == 2: | |
| src, tgt = edge | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt} (devrait etre {tgt} -> {src})"), ln=True) | |
| if len(inverted_edges) > 10: | |
| pdf.cell(5, 5, "", 0, 0) | |
| pdf.cell(0, 5, f"... et {len(inverted_edges) - 10} autres", ln=True) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # PAGE DE SYNTHÈSE FINALE | |
| # ═════════════════════════════════════════════════════════════════════ | |
| pdf.add_page() | |
| draw_colored_header("SYNTHESE GLOBALE", 76, 175, 80) # ← Sans emoji | |
| # Calcul des moyennes | |
| n = len(results) | |
| avg_prec = sum(r["details"].get("overall_precision", 0) for r in results) / n | |
| avg_rec = sum(r["details"].get("overall_recall", 0) for r in results) / n | |
| avg_f1 = sum(r["details"].get("overall_f1", 0) for r in results) / n | |
| draw_section_title("MOYENNES SUR TOUS LES DOCUMENTS") # | |
| draw_progress_bar("Precision Moyenne", avg_prec) | |
| draw_progress_bar("Rappel Moyen", avg_rec) | |
| draw_progress_bar("F1-Score Moyen", avg_f1) | |
| pdf.ln(5) | |
| # Tableau récapitulatif par document | |
| draw_section_title("RECAPITULATIF PAR DOCUMENT") | |
| draw_table_header(["Document", "Precision", "Rappel", "F1"]) | |
| for i, r in enumerate(results, 1): | |
| d = r["details"] | |
| draw_table_row([ | |
| r["file"][:25], # Tronquer si trop long | |
| f"{d.get('overall_precision', 0):.1f}%", | |
| f"{d.get('overall_recall', 0):.1f}%", | |
| f"{d.get('overall_f1', 0):.1f}%" | |
| ], fill=(i % 2 == 0)) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # Sauvegarde | |
| # ═════════════════════════════════════════════════════════════════════ | |
| output_path = f"reports/{model_name}_report.pdf" | |
| pdf.output(output_path) | |
| logging.info(f"Rapport PDF moderne sauvegarde dans {output_path}") | |
| def plot_all_boxplots(): | |
| """ | |
| Génère 3 box-plots comparant les performances de tous les modèles testés. | |
| Returns: | |
| tuple: (fig_precision, fig_recall, fig_f1) | |
| 3 figures matplotlib prêtes à afficher | |
| Note: | |
| Utilise la liste globale model_precision_records qui accumule | |
| les résultats de tous les modèles testés dans la session. | |
| Box-plots générés: | |
| 1. Distribution des précisions par modèle | |
| 2. Distribution des rappels par modèle | |
| 3. Distribution des F1-scores par modèle | |
| Interprétation: | |
| - Boîte: Q1 à Q3 (50% des valeurs) | |
| - Ligne centrale: médiane | |
| - Moustaches: min/max (hors outliers) | |
| - Points: outliers | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1: Regroupement des métriques par modèle | |
| # ═════════════════════════════════════════════════════════════════════ | |
| records_by_model = {} | |
| # Parcourir tous les enregistrements | |
| for rec in model_precision_records: | |
| # Créer une liste pour chaque modèle si elle n'existe pas | |
| records_by_model.setdefault(rec["model"], []).append(rec) | |
| # Extraire les noms de modèles | |
| models = list(records_by_model.keys()) | |
| # Créer les listes de métriques par modèle | |
| # Format: [[prec_model1_doc1, prec_model1_doc2, ...], [prec_model2_doc1, ...], ...] | |
| precisions = [[r["precision"] for r in records_by_model[m]] for m in models] | |
| recalls = [[r["recall"] for r in records_by_model[m]] for m in models] | |
| f1s = [[r["f1"] for r in records_by_model[m]] for m in models] | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2: Génération des box-plots | |
| # ═════════════════════════════════════════════════════════════════════ | |
| figs = [] | |
| # Boucle sur les 3 métriques | |
| for data, ylabel, title in ( | |
| (precisions, "Précision (%)", "Distribution des Précisions"), | |
| (recalls, "Rappel (%)", "Distribution des Rappels"), | |
| (f1s, "F1-Score (%)", "Distribution des F1-Scores") | |
| ): | |
| # Vérifier qu'on a des données | |
| if not data or not models: | |
| continue | |
| # Créer une nouvelle figure | |
| fig, ax = plt.subplots() | |
| # Dessiner le box-plot | |
| ax.boxplot(data, tick_labels=models) | |
| # Configurer les labels | |
| ax.set_ylabel(ylabel) | |
| ax.set_title(title) | |
| # Rotation des labels pour lisibilité | |
| plt.xticks(rotation=30, ha="right") | |
| plt.tight_layout() | |
| figs.append(fig) | |
| return tuple(figs) | |
| def save_performance_plots(): | |
| """ | |
| Sauvegarde les box-plots de performance dans des fichiers temporaires. | |
| Returns: | |
| list: Chemins vers les 3 fichiers PNG générés | |
| Note: | |
| Les fichiers sont créés dans le dossier temporaire du système | |
| et seront automatiquement nettoyés par le système d'exploitation. | |
| Fichiers générés: | |
| - precision_XXXXX.png | |
| - recall_XXXXX.png | |
| - f1_XXXXX.png | |
| """ | |
| # Générer les 3 figures | |
| figs = plot_all_boxplots() | |
| tmp_paths = [] | |
| # Sauvegarder chaque figure dans un fichier temporaire | |
| for fig, prefix in zip(figs, ("precision", "recall", "f1")): | |
| # Créer un fichier temporaire | |
| tmp = tempfile.NamedTemporaryFile( | |
| delete=False, # Ne pas supprimer automatiquement | |
| suffix=".png", # Extension du fichier | |
| prefix=f"{prefix}_", # Préfixe du nom | |
| dir=tempfile.gettempdir() # Dossier temporaire du système | |
| ) | |
| # Sauvegarder la figure | |
| fig.savefig(tmp.name, format="png", bbox_inches="tight") | |
| # Fermer la figure pour libérer la mémoire | |
| plt.close(fig) | |
| # Fermer le fichier | |
| tmp.close() | |
| # Ajouter le chemin à la liste | |
| tmp_paths.append(tmp.name) | |
| return tmp_paths | |
| # ============================================ | |
| # SECTION 8 : FONCTION PRINCIPALE DE TRAITEMENT | |
| # ============================================ | |
| def process_files(files, model_choice, use_semantic=True, fuzzy_threshold=80, | |
| semantic_threshold=70, alpha=0.6, edge_mode="hybrid"): | |
| """ | |
| Fonction principale : traite un batch de fichiers et évalue les performances. | |
| Args: | |
| files (list): Liste de fichiers uploadés (objets Gradio) | |
| model_choice (str): Nom du modèle à utiliser | |
| use_semantic (bool): Activer le matching sémantique (vs fuzzy seul) | |
| fuzzy_threshold (int): Seuil fuzzy matching (0-100) | |
| semantic_threshold (int): Seuil semantic matching (0-100) | |
| alpha (float): Pondération fuzzy/sémantique (0-1) | |
| Returns: | |
| tuple: (images, mermaid_text, dropdown_update, iframe_dict, | |
| initial_graph, summary_html, report_msg) | |
| Pipeline complet: | |
| 1. Nettoyage de Neo4j | |
| 2. Pour chaque fichier: | |
| a. Conversion PDF → Image (si nécessaire) | |
| b. Encodage Base64 | |
| c. Génération Mermaid par le modèle d'IA | |
| d. Conversion Mermaid → JSON | |
| e. Comparaison avec Ground Truth | |
| f. Upload dans Neo4j | |
| g. Sauvegarde des différences | |
| 3. Calcul des métriques moyennes | |
| 4. Génération du rapport PDF | |
| 5. Construction des outputs pour Gradio | |
| Note: | |
| Cette fonction orchestre tout le workflow de l'application | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 0: Validation des entrées | |
| # ═════════════════════════════════════════════════════════════════════ | |
| if not files: | |
| return None, "", gr_update(choices=[], value=None), {}, "", "", "" | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 1: Initialisation | |
| # ═════════════════════════════════════════════════════════════════════ | |
| neo4j_ok = clear_neo4j() # Vider la base pour ce batch | |
| if not neo4j_ok: | |
| logging.error("Neo4j indisponible: la génération continue sans upload Neo4j.") | |
| # Initialisation des structures de données | |
| results = [] # Résultats de comparaison pour chaque fichier | |
| images = [] # Images pour la galerie Gradio | |
| mermaids = [] # Codes Mermaid générés | |
| docs = [] # Noms des documents | |
| iframe_dict = {} # Mapping doc_name → HTML iframe | |
| error_messages = [] | |
| model_choice_str = str(model_choice or "") | |
| model_key = "Gemini" | |
| if model_choice_str.lower().startswith("gemini"): | |
| model_key = "Gemini" | |
| elif model_choice_str.lower().startswith("gemma"): | |
| model_key = "GEMMA" | |
| elif model_choice_str.lower().startswith("llama"): | |
| model_key = "LLAMA" | |
| elif model_choice_str.lower().startswith("qwen"): | |
| model_key = "QWEN" | |
| elif model_choice_str.lower().startswith("nvidia"): | |
| model_key = "NVIDIA" | |
| elif model_choice_str.lower().startswith("internvl"): | |
| model_key = "InternVL" | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 2: Traitement de chaque fichier | |
| # ═════════════════════════════════════════════════════════════════════ | |
| for file in files: | |
| diff_path = None # Chemin du fichier de différences | |
| try: | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.1: Préparation | |
| # ───────────────────────────────────────────────────────────── | |
| # Extraire le nom du fichier (sans extension) | |
| name = os.path.splitext(os.path.basename(file.name))[0] | |
| # Chemin vers le Ground Truth correspondant | |
| gt = os.path.join("GT", f"{name}.json") | |
| gt_exists = os.path.exists(gt) | |
| if not gt_exists: | |
| logging.error(f"Ground truth manquant pour {name}") | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.2: Conversion en image (si PDF) | |
| # ───────────────────────────────────────────────────────────── | |
| if file.name.lower().endswith(".pdf"): | |
| img = convert_pdf_to_image(file.name) | |
| else: | |
| img = file.name # Déjà une image | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.3: Encodage Base64 | |
| # ───────────────────────────────────────────────────────────── | |
| b64 = encode_image(img) | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.4: Génération du code Mermaid par le modèle | |
| # ───────────────────────────────────────────────────────────── | |
| # Sélection de la fonction selon le modèle choisi | |
| mermaid_fn = { | |
| "Gemini": generate_mermaid_from_image_gemini, | |
| "LLAMA": generate_mermaid_from_llama, | |
| "GEMMA": generate_mermaid_from_gemma, | |
| "QWEN": generate_mermaid_from_qwen, | |
| "NVIDIA": generate_mermaid_from_nvidia, | |
| "InternVL": generate_mermaid_from_intern, | |
| }.get(model_key, generate_mermaid_from_image_gemini) # Gemini par défaut | |
| # Appel du modèle | |
| code = mermaid_fn(b64) | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.5: Conversion Mermaid → JSON | |
| # ───────────────────────────────────────────────────────────── | |
| j = mermaid_to_json(code) | |
| if gt_exists: | |
| gt_n, gt_e = load_json(gt) | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.6: Comparaison avec le Ground Truth | |
| # ───────────────────────────────────────────────────────────── | |
| # Choix de la fonction de comparaison selon le mode | |
| if gt_exists: | |
| if use_semantic: | |
| cmp = compare_graphs_with_semantic_fast( | |
| gt_n, gt_e, j["nodes"], j["edges"], | |
| fuzzy_threshold=fuzzy_threshold, | |
| semantic_threshold=semantic_threshold, | |
| alpha=alpha, | |
| edge_mode=edge_mode | |
| ) | |
| else: | |
| cmp = compare_graphs( | |
| gt_n, gt_e, j["nodes"], j["edges"], | |
| threshold=fuzzy_threshold | |
| ) | |
| else: | |
| cmp = { | |
| "overall_precision": 0, | |
| "overall_recall": 0, | |
| "overall_f1": 0, | |
| "precision_nodes": 0, | |
| "recall_nodes": 0, | |
| "f1_nodes": 0, | |
| "precision_edges": 0, | |
| "recall_edges": 0, | |
| "f1_edges": 0, | |
| "missing_nodes": [], | |
| "extra_nodes": [], | |
| "missing_edges": [], | |
| "extra_edges": [], | |
| "hallucination_rate": 0, | |
| "detection_rate": 0, | |
| "total_generated": len(j.get("nodes", [])), | |
| "num_hallucinated": 0, | |
| "num_correct": 0, | |
| "hallucinated_nodes": [], | |
| "correct_nodes": [], | |
| "total_gt": 0, | |
| "num_detected": 0, | |
| "num_missed": 0, | |
| "missed_gt_nodes": [] | |
| } | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.7: Sauvegarde des différences dans un fichier JSON | |
| # ───────────────────────────────────────────────────────────── | |
| diff_path = f"graph_diff_{name}.json" | |
| diff_data = { | |
| "missing_nodes": cmp["missing_nodes"], | |
| "extra_nodes": cmp["extra_nodes"], | |
| "missing_edges": cmp["missing_edges"], | |
| "extra_edges": cmp["extra_edges"], | |
| } | |
| # Ajouter les détails de matching si disponibles | |
| if "matching_details" in cmp: | |
| diff_data["matching_details"] = cmp["matching_details"] | |
| # Écriture du fichier | |
| with open(diff_path, "w", encoding="utf-8") as f: | |
| json.dump(diff_data, f, ensure_ascii=False, indent=2) | |
| logging.info(f"Différences écrites pour {name} → {diff_path}") | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.8: Upload dans Neo4j | |
| # ───────────────────────────────────────────────────────────── | |
| # Charger GT et Test | |
| if gt_exists: | |
| gt_nodes, gt_edges = load_json(gt) | |
| gt_json = {"nodes": [{"id": k, "label": v} for k, v in gt_nodes.items()], "edges": gt_edges} | |
| else: | |
| gt_json = {"nodes": [], "edges": []} | |
| # Upload simultané du GT et du Test | |
| if neo4j_ok: | |
| upload_gt_and_test_to_neo4j(gt_json, j, name) | |
| # ───────────────────────────────────────────────────────────── | |
| # 2.9: Enregistrement des résultats | |
| # ───────────────────────────────────────────────────────────── | |
| results.append({ | |
| "file": name, | |
| "precision": cmp["overall_precision"], | |
| "details": { | |
| **cmp, | |
| "test_nodes": j.get("nodes", []), | |
| "test_edges": j.get("edges", []) | |
| } | |
| }) | |
| images.append(Image.open(img)) | |
| mermaids.append(code) | |
| except Exception as e: | |
| logging.error(f"Erreur sur le fichier {file.name}: {e}") | |
| error_messages.append(f"{os.path.basename(file.name)}: {e}") | |
| continue | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 3: Gestion des erreurs globales | |
| # ═════════════════════════════════════════════════════════════════════ | |
| if not results: | |
| empty_msg = '<div style="text-align:center;padding:40px;color:#7f8c8d;background:#f8f9fa;border:2px dashed #e0e0e0;border-radius:8px;">Aucun graphe disponible</div>' | |
| if error_messages: | |
| details = "<br/>".join(error_messages[-8:]) | |
| empty_msg = ( | |
| '<div style="padding:16px;background:#fff3cd;border:1px solid #ffeeba;border-radius:8px;">' | |
| '<div style="font-weight:600;margin-bottom:8px;">Erreurs lors de la génération</div>' | |
| f'<div style="font-family:monospace;white-space:pre-wrap;">{details}</div>' | |
| '</div>' | |
| ) | |
| return ( | |
| None, # 1. images (gallery) | |
| "", # 2. mermaid (textbox) | |
| gr_update(choices=[], value=None), # 3. dropdown | |
| {}, # 4. iframe_dict (json) | |
| empty_msg, # 5. gt_graph_panel (html) ← AJOUTÉ | |
| empty_msg, # 6. test_graph_panel (html) ← AJOUTÉ | |
| "<div>Erreur sur tous les fichiers</div>", # 7. summary (html) | |
| "" # 8. report_status (textbox) | |
| ) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 4: Calcul des métriques moyennes | |
| # ═════════════════════════════════════════════════════════════════════ | |
| n = len(results) # Nombre de fichiers traités avec succès | |
| # Métriques classiques | |
| avg_precision = round( | |
| sum(r["details"]["overall_precision"] for r in results) / n, 2 | |
| ) | |
| avg_recall = round( | |
| sum(r["details"]["overall_recall"] for r in results) / n, 2 | |
| ) | |
| avg_f1 = round( | |
| sum(r["details"]["overall_f1"] for r in results) / n, 2 | |
| ) | |
| # Métriques d'hallucination | |
| avg_hallucination = round( | |
| sum(r["details"].get("hallucination_rate", 0) for r in results) / n, 2 | |
| ) | |
| avg_detection = round( | |
| sum(r["details"].get("detection_rate", 0) for r in results) / n, 2 | |
| ) | |
| # Totaux cumulés | |
| total_hallucinated = sum(r["details"].get("num_hallucinated", 0) for r in results) | |
| total_generated = sum(r["details"].get("total_generated", 0) for r in results) | |
| total_detected = sum(r["details"].get("num_detected", 0) for r in results) | |
| total_gt = sum(r["details"].get("total_gt", 0) for r in results) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 5: Enregistrement pour les graphiques de performance | |
| # ═════════════════════════════════════════════════════════════════════ | |
| model_precision_records.append({ | |
| "model": model_choice, | |
| "precision": avg_precision, | |
| "recall": avg_recall, | |
| "f1": avg_f1, | |
| "hallucination_rate": avg_hallucination | |
| }) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 6: Génération du rapport PDF | |
| # ═════════════════════════════════════════════════════════════════════ | |
| try: | |
| save_results_to_pdf(results, model_choice) | |
| report_status = "Rapport PDF généré." | |
| except Exception as e: | |
| logging.error(f"⚠️ Génération PDF ignorée (l'application continue): {e}") | |
| report_status = f"Rapport PDF non généré: {e}" | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 7: Génération du HTML de résumé pour l'interface | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # Calculer les moyennes des métriques d'arêtes | |
| avg_precision_edges = round( | |
| sum(r["details"]["precision_edges"] for r in results) / n, 2 | |
| ) | |
| avg_recall_edges = round( | |
| sum(r["details"]["recall_edges"] for r in results) / n, 2 | |
| ) | |
| avg_f1_edges = round( | |
| sum(r["details"]["f1_edges"] for r in results) / n, 2 | |
| ) | |
| avg_inversion_rate = round( | |
| sum(r["details"].get("inversion_rate", 0) for r in results) / n, 2 | |
| ) | |
| # Calculer les moyennes des métriques hiérarchiques | |
| avg_gt_depth = round( | |
| sum(r["details"].get("gt_depth", 0) for r in results) / n, 1 | |
| ) | |
| avg_test_depth = round( | |
| sum(r["details"].get("test_depth", 0) for r in results) / n, 1 | |
| ) | |
| avg_level_accuracy = round( | |
| sum(r["details"].get("level_accuracy", 0) for r in results) / n, 2 | |
| ) | |
| avg_structure_similarity = round( | |
| sum(r["details"].get("structure_similarity", 0) for r in results) / n, 2 | |
| ) | |
| depth_matches = sum(1 for r in results if r["details"].get("depth_match", False)) | |
| depth_match_rate = round(depth_matches / n * 100, 2) | |
| # CALCUL DES MOYENNES GED | |
| avg_ged = round( | |
| sum(r["details"].get("ged", 0) for r in results if r["details"].get("ged") is not None) / n, 2 | |
| ) if n > 0 else 0 | |
| avg_normalized_ged = round( | |
| sum(r["details"].get("normalized_ged", 0) for r in results if | |
| r["details"].get("normalized_ged") is not None) / n, 2 | |
| ) if n > 0 else 0 | |
| avg_structural_sim_ged = round( | |
| sum(r["details"].get("structural_similarity", 0) for r in results if | |
| r["details"].get("structural_similarity") is not None) / n, 2 | |
| ) if n > 0 else 0 | |
| total_node_ins = sum(r["details"].get("num_node_insertions", 0) for r in results) | |
| total_node_del = sum(r["details"].get("num_node_deletions", 0) for r in results) | |
| total_edge_ins = sum(r["details"].get("num_edge_insertions", 0) for r in results) | |
| total_edge_del = sum(r["details"].get("num_edge_deletions", 0) for r in results) | |
| summary_html = f""" | |
| <div style='font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; background: #ffffff; padding: 24px; border-radius: 12px; border: 1px solid #e0e0e0; box-shadow: 0 1px 3px rgba(0,0,0,0.08);'> | |
| <!-- En-tête sobre --> | |
| <div style="border-bottom: 2px solid #f5f5f5; padding-bottom: 16px; margin-bottom: 24px;"> | |
| <h2 style='color: #2c3e50; margin: 0; font-size: 1.5em; font-weight: 600;'> | |
| Résumé des Performances | |
| </h2> | |
| <p style="color: #7f8c8d; margin: 8px 0 0 0; font-size: 0.9em;"> | |
| Analyse de {n} document(s) • Modèle : {model_choice} • Mode : {edge_mode} | |
| </p> | |
| </div> | |
| <!-- Métriques Principales --> | |
| <div style="margin-bottom: 32px;"> | |
| <h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 16px 0;"> | |
| Métriques Globales | |
| </h3> | |
| <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 16px;"> | |
| <!-- Précision --> | |
| <div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #3498db;"> | |
| <div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Précision</div> | |
| <div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_precision}%</div> | |
| </div> | |
| <!-- Rappel --> | |
| <div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #2ecc71;"> | |
| <div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Rappel</div> | |
| <div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_recall}%</div> | |
| </div> | |
| <!-- F1-Score --> | |
| <div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #9b59b6;"> | |
| <div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">F1-Score</div> | |
| <div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_f1}%</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Analyse des Nœuds et Arêtes --> | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 24px; margin-bottom: 32px;"> | |
| <!-- Nœuds --> | |
| <div> | |
| <h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;"> | |
| Analyse des Nœuds | |
| </h3> | |
| <div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; overflow: hidden;"> | |
| <table style="width: 100%; border-collapse: collapse; font-size: 0.85em;"> | |
| <tr style="background: #f8f9fa;"> | |
| <th style="padding: 10px; text-align: left; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Métrique</th> | |
| <th style="padding: 10px; text-align: right; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Valeur</th> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Précision</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["precision_nodes"] for r in results) / n:.1f}%</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Rappel</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["recall_nodes"] for r in results) / n:.1f}%</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">F1-Score</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["f1_nodes"] for r in results) / n:.1f}%</td> | |
| </tr> | |
| <tr style="background: #fef9e7; border-top: 2px solid #f39c12;"> | |
| <td style="padding: 10px; color: #e67e22; font-weight: 600;">Hallucination</td> | |
| <td style="padding: 10px; text-align: right; font-weight: 700; color: #e67e22;">{avg_hallucination}%</td> | |
| </tr> | |
| </table> | |
| </div> | |
| </div> | |
| <!-- Arêtes --> | |
| <div> | |
| <h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;"> | |
| Analyse des Arêtes | |
| </h3> | |
| <div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; overflow: hidden;"> | |
| <table style="width: 100%; border-collapse: collapse; font-size: 0.85em;"> | |
| <tr style="background: #f8f9fa;"> | |
| <th style="padding: 10px; text-align: left; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Métrique</th> | |
| <th style="padding: 10px; text-align: right; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Valeur</th> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Précision</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_precision_edges}%</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Rappel</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_recall_edges}%</td> | |
| </tr> | |
| <tr> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">F1-Score</td> | |
| <td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_f1_edges}%</td> | |
| </tr> | |
| <tr style="background: #fef5e7; border-top: 2px solid #e67e22;"> | |
| <td style="padding: 10px; color: #d35400; font-weight: 600;">Inversions</td> | |
| <td style="padding: 10px; text-align: right; font-weight: 700; color: #d35400;">{avg_inversion_rate}%</td> | |
| </tr> | |
| </table> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Hiérarchie --> | |
| <div style="margin-bottom: 32px;"> | |
| <h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;"> | |
| Structure Hiérarchique | |
| </h3> | |
| <div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 16px;"> | |
| <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px;"> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Profondeur GT</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_gt_depth}</div> | |
| </div> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Profondeur Test</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_test_depth}</div> | |
| </div> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Précision Niveaux</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_level_accuracy}%</div> | |
| </div> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Similarité</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_structure_similarity}%</div> | |
| </div> | |
| </div> | |
| <div style="margin-top: 12px; padding-top: 12px; border-top: 1px solid #e0e0e0; color: #7f8c8d; font-size: 0.75em;"> | |
| {depth_matches} document(s) avec profondeur exacte sur {n} ({depth_match_rate}%) | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Graph Edit Distance --> | |
| <div style="margin-bottom: 32px;"> | |
| <h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;"> | |
| Graph Edit Distance (GED) | |
| </h3> | |
| <div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 16px;"> | |
| <!-- Métriques principales GED --> | |
| <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin-bottom: 16px;"> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">GED Moyen</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_ged}</div> | |
| </div> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">GED Normalisé</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_normalized_ged}%</div> | |
| </div> | |
| <div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;"> | |
| <div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Similarité</div> | |
| <div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_structural_sim_ged}%</div> | |
| </div> | |
| </div> | |
| <!-- Détails des opérations --> | |
| <div style="padding-top: 12px; border-top: 1px solid #e0e0e0;"> | |
| <div style="color: #7f8c8d; font-size: 0.75em; margin-bottom: 8px; font-weight: 600;">Opérations totales requises :</div> | |
| <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 8px; font-size: 0.85em;"> | |
| <div style="background: #fef5e7; padding: 8px; border-radius: 4px;"> | |
| <span style="color: #7f8c8d;">Insertions nœuds :</span> | |
| <span style="color: #e67e22; font-weight: 600; margin-left: 8px;">{total_node_ins}</span> | |
| </div> | |
| <div style="background: #fef5e7; padding: 8px; border-radius: 4px;"> | |
| <span style="color: #7f8c8d;">Suppressions nœuds :</span> | |
| <span style="color: #e67e22; font-weight: 600; margin-left: 8px;">{total_node_del}</span> | |
| </div> | |
| <div style="background: #ebf5fb; padding: 8px; border-radius: 4px;"> | |
| <span style="color: #7f8c8d;">Insertions arêtes :</span> | |
| <span style="color: #3498db; font-weight: 600; margin-left: 8px;">{total_edge_ins}</span> | |
| </div> | |
| <div style="background: #ebf5fb; padding: 8px; border-radius: 4px;"> | |
| <span style="color: #7f8c8d;">Suppressions arêtes :</span> | |
| <span style="color: #3498db; font-weight: 600; margin-left: 8px;">{total_edge_del}</span> | |
| </div> | |
| </div> | |
| <div style="margin-top: 12px; padding: 8px; background: #f0f4f8; border-radius: 4px; font-size: 0.8em; color: #5a6c7d;"> | |
| 💡 Le GED mesure le nombre minimum d'opérations (insertions/suppressions) pour transformer le graphe test en graphe GT | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Statistiques Détaillées --> | |
| <div style="background: #f8f9fa; border-radius: 8px; padding: 16px;"> | |
| <h3 style="color: #34495e; font-size: 0.9em; font-weight: 600; margin: 0 0 12px 0;"> | |
| Statistiques de Génération | |
| </h3> | |
| <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 16px; font-size: 0.85em;"> | |
| <div> | |
| <span style="color: #7f8c8d;">Nœuds générés :</span> | |
| <span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{total_generated}</span> | |
| </div> | |
| <div> | |
| <span style="color: #7f8c8d;">Nœuds GT :</span> | |
| <span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{total_gt}</span> | |
| </div> | |
| <div> | |
| <span style="color: #7f8c8d;">Nœuds corrects :</span> | |
| <span style="color: #27ae60; font-weight: 600; margin-left: 8px;">{total_generated - total_hallucinated}</span> | |
| </div> | |
| <div> | |
| <span style="color: #7f8c8d;">Nœuds détectés :</span> | |
| <span style="color: #27ae60; font-weight: 600; margin-left: 8px;">{total_detected}</span> | |
| </div> | |
| <div> | |
| <span style="color: #7f8c8d;">Nœuds hallucinés :</span> | |
| <span style="color: #e74c3c; font-weight: 600; margin-left: 8px;">{total_hallucinated}</span> | |
| </div> | |
| <div> | |
| <span style="color: #7f8c8d;">Taux de détection :</span> | |
| <span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{avg_detection}%</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 8: Construction des iframes pour la visualisation Neo4j | |
| # ═════════════════════════════════════════════════════════════════════ | |
| for r in results: | |
| doc = r["file"] | |
| docs.append(doc) | |
| # Rendu Mermaid directement (compatible HF, pas besoin de serveur local:8000) | |
| gt_path = os.path.join("GT", f"{doc}.json") | |
| if os.path.exists(gt_path): | |
| gt_nodes, gt_edges = load_json(gt_path) | |
| gt_json = {"nodes": [{"id": k, "label": v} for k, v in gt_nodes.items()], "edges": gt_edges} | |
| gt_mermaid = graph_json_to_mermaid(gt_json) | |
| else: | |
| gt_mermaid = "graph TD;" | |
| test_mermaid = graph_json_to_mermaid({"nodes": r["details"].get("test_nodes", []), "edges": r["details"].get("test_edges", [])}) | |
| gt_iframe_html = mermaid_to_html(gt_mermaid, f"mermaid_gt_{doc}") | |
| test_iframe_html = mermaid_to_html(test_mermaid, f"mermaid_test_{doc}") | |
| # Stocker les 2 iframes | |
| iframe_dict[doc] = { | |
| "gt": gt_iframe_html, | |
| "test": test_iframe_html | |
| } | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # ÉTAPE 9: Préparation des sorties pour Gradio | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # Mise à jour du dropdown avec la liste des documents | |
| dropdown_update = gr_update(choices=docs, value=docs[0]) | |
| # Iframes initiaux (premier document) | |
| first_doc_iframes = iframe_dict[docs[0]] | |
| initial_gt_graph = first_doc_iframes["gt"] | |
| initial_test_graph = first_doc_iframes["test"] | |
| # Message de confirmation | |
| report_msg = f"Rapport sauvegardé : reports/{model_choice}_report.pdf" | |
| # ═════════════════════════════════════════════════════════════════ | |
| # RETOUR: Tuple de 8 éléments pour Gradio (1 de plus qu'avant) | |
| # ═════════════════════════════════════════════════════════════════ | |
| return ( | |
| images, # 1. Galerie d'images | |
| "\n\n".join(mermaids), # 2. Codes Mermaid | |
| dropdown_update, # 3. Dropdown | |
| iframe_dict, # 4. Dict des iframes | |
| initial_gt_graph, # 5. GT du premier doc | |
| initial_test_graph, # 6. Test du premier doc | |
| summary_html, # 7. Résumé | |
| report_msg # 8. Message statut | |
| ) | |
| # ============================================ | |
| # SECTION 9 : FONCTIONS AUXILIAIRES POUR L'INTERFACE | |
| # ============================================ | |
| def select_graph(selected_doc, iframe_dict): | |
| """ | |
| Retourne les 2 iframes HTML (GT + Test) pour le document sélectionné. | |
| Args: | |
| selected_doc (str): Nom du document sélectionné | |
| iframe_dict (dict): Mapping doc_name → {"gt": iframe_gt, "test": iframe_test} | |
| Returns: | |
| tuple: (gt_iframe_html, test_iframe_html) | |
| """ | |
| if not selected_doc or selected_doc not in iframe_dict: | |
| empty_msg = '<div style="text-align:center;padding:40px;color:#7f8c8d;">Aucun graphe disponible</div>' | |
| return empty_msg, empty_msg | |
| iframes = iframe_dict[selected_doc] | |
| return iframes["gt"], iframes["test"] | |
| def aggregate_and_preview(paths, threshold, fuzzy_threshold=70, semantic_threshold=70): | |
| """ | |
| VERSION AVEC MÉTRIQUES DE COMPARAISON | |
| """ | |
| if not paths or len(paths) == 0: | |
| error_html = """ | |
| <div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#856404;">⚠️ Aucun fichier JSON</h3> | |
| <p style="color:#856404;">Exportez d'abord les fichiers à l'étape 5.1</p> | |
| </div> | |
| """ | |
| return None, error_html, "" # ← 3 outputs maintenant | |
| logging.info(f"🔀 Début agrégation: {len(paths)} fichiers") | |
| try: | |
| agg = aggregate_mindmaps( | |
| paths, | |
| "aggregate.json", | |
| threshold / 100, | |
| fuzzy_threshold=fuzzy_threshold, | |
| semantic_threshold=semantic_threshold | |
| ) | |
| if len(agg['nodes']) == 0: | |
| warning_html = f""" | |
| <div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#856404;">⚠️ Graphe vide</h3> | |
| <p style="color:#856404;"> | |
| Aucun nœud au-dessus du seuil de {threshold}%<br> | |
| <strong>→ Diminuez le seuil (ex: 30%)</strong> | |
| </p> | |
| </div> | |
| """ | |
| return "aggregate.json", warning_html, "" # ← 3 outputs | |
| except Exception as e: | |
| logging.error(f"❌ Erreur agrégation: {e}") | |
| import traceback | |
| logging.error(traceback.format_exc()) | |
| error_html = f""" | |
| <div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#721c24;">❌ Erreur</h3> | |
| <p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p> | |
| </div> | |
| """ | |
| return None, error_html, "" # ← 3 outputs | |
| # Générer l'iframe | |
| ts = int(time.time()) | |
| src = f"http://127.0.0.1:8000/visual_aggregate.html?t={ts}" | |
| iframe_html = f""" | |
| <div style="border:1px solid #e0e0e0;border-radius:8px;overflow:hidden;background:white;"> | |
| <div style="background:#f8f9fa;padding:12px;border-bottom:1px solid #e0e0e0;"> | |
| <h4 style="margin:0;color:#2c3e50;"> | |
| 🔗 Graphe Agrégé | |
| <span style="color:#7f8c8d;font-size:0.85em;font-weight:normal;"> | |
| ({len(agg['nodes'])} nœuds, {len(agg['edges'])} arêtes) | |
| </span> | |
| </h4> | |
| <p style="margin:5px 0 0 0;font-size:0.75em;color:#7f8c8d;"> | |
| Seuils: Fréquence {threshold}% | Fuzzy {fuzzy_threshold}% | Sémantique {semantic_threshold}% | |
| </p> | |
| </div> | |
| <iframe src="{src}" | |
| style="width:100%;height:700px;border:none;background:white;display:block;"> | |
| </iframe> | |
| </div> | |
| """ | |
| # CORRECTION : Passer les seuils à la fonction de comparaison | |
| metrics_html = compare_aggregated_with_gt( | |
| GT_ORIGINAL_PATH, | |
| "aggregate.json", | |
| fuzzy_threshold=fuzzy_threshold, | |
| semantic_threshold=semantic_threshold, | |
| frequency_threshold=threshold | |
| ) | |
| logging.info(f"✅ Visualisation générée: {src}") | |
| return "aggregate.json", iframe_html, metrics_html # ← 3 outputs | |
| def export_mindmaps_to_json(files, model_choice="Gemini", output_dir=None): | |
| """ | |
| Exporte les mindmaps en fichiers JSON pour agrégation. | |
| """ | |
| if output_dir is None: | |
| output_dir = OUTPUT_JSON_DIR | |
| os.makedirs(output_dir, exist_ok=True) | |
| model_functions = { | |
| "Gemini": generate_mermaid_from_image_gemini, | |
| "LLAMA": generate_mermaid_from_llama, | |
| "GEMMA": generate_mermaid_from_gemma, | |
| "QWEN": generate_mermaid_from_qwen, | |
| "NVIDIA": generate_mermaid_from_nvidia, | |
| "InternVL": generate_mermaid_from_intern, | |
| } | |
| mermaid_fn = model_functions.get(model_choice, generate_mermaid_from_image_gemini) | |
| saved_paths = [] | |
| logging.info("=" * 70) | |
| logging.info(f"📤 EXPORT JSON - Modèle: {model_choice}") | |
| logging.info(f"📁 Dossier: {output_dir}") | |
| logging.info(f"📄 Fichiers: {len(files) if files else 0}") | |
| logging.info("=" * 70) | |
| if not files: | |
| logging.warning("⚠️ Aucun fichier à exporter") | |
| return [] | |
| # Traitement de tous les fichiers | |
| for idx, file in enumerate(files, 1): | |
| try: | |
| # Récupérer le chemin du fichier | |
| file_path = file.name if hasattr(file, 'name') else file | |
| logging.info(f"\n[{idx}/{len(files)}] Traitement: {os.path.basename(file_path)}") | |
| # Conversion en image si PDF | |
| img = file_path | |
| if file_path.lower().endswith(".pdf"): | |
| logging.info(" 📄 → 🖼️ Conversion PDF...") | |
| img = convert_pdf_to_image(file_path) | |
| # Encodage Base64 | |
| logging.info(" 🔐 Encodage...") | |
| b64 = encode_image(img) | |
| # Génération Mermaid | |
| logging.info(f" 🤖 Génération avec {model_choice}...") | |
| mermaid = mermaid_fn(b64) | |
| # Conversion JSON | |
| logging.info(" 🔄 Conversion JSON...") | |
| graph_json = mermaid_to_json(mermaid) | |
| # Sauvegarde | |
| base_name = os.path.splitext(os.path.basename(file_path))[0] | |
| output_path = os.path.join(output_dir, f"{base_name}.json") | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| json.dump(graph_json, f, ensure_ascii=False, indent=2) | |
| saved_paths.append(output_path) | |
| logging.info(f" ✅ Exporté: {output_path}") | |
| logging.info(f" • Nœuds: {len(graph_json['nodes'])}") | |
| logging.info(f" • Arêtes: {len(graph_json['edges'])}") | |
| except Exception as e: | |
| logging.error(f" ❌ Erreur: {e}") | |
| import traceback | |
| logging.error(traceback.format_exc()) | |
| continue | |
| logging.info("=" * 70) | |
| logging.info(f"✅ EXPORT TERMINÉ: {len(saved_paths)}/{len(files)}") | |
| logging.info("=" * 70) | |
| return saved_paths | |
| def compare_aggregated_with_gt(gt_path, agg_json_path, fuzzy_threshold=70, semantic_threshold=70, | |
| frequency_threshold=40): | |
| """ | |
| Filtrage selon 'freq' en % | |
| """ | |
| try: | |
| # Charger les deux graphes | |
| with open(gt_path, 'r', encoding='utf-8') as f: | |
| gt_data = json.load(f) | |
| with open(agg_json_path, 'r', encoding='utf-8') as f: | |
| agg_data = json.load(f) | |
| gt_nodes = gt_data.get('nodes', []) | |
| gt_edges = gt_data.get('edges', []) | |
| all_agg_nodes = agg_data.get('nodes', []) | |
| all_agg_edges = agg_data.get('edges', []) | |
| # Filtrer les nœuds selon leur fréquence (valeurs déjà en %) | |
| agg_nodes = [n for n in all_agg_nodes if n.get('freq', 0) >= frequency_threshold] | |
| # Nettoyer les labels (retirer " (XX.XX%)") | |
| for node in agg_nodes: | |
| label = node.get('label', '') | |
| if '(' in label: | |
| # "OOP (33.33%)" → "OOP" | |
| node['label'] = label.split('(')[0].strip() | |
| # Logs de diagnostic | |
| logging.info("═" * 70) | |
| logging.info(f"🔍 FILTRAGE PAR FRÉQUENCE (seuil: {frequency_threshold}%)") | |
| logging.info("═" * 70) | |
| logging.info(f"📊 Avant filtrage : {len(all_agg_nodes)} nœuds, {len(all_agg_edges)} arêtes") | |
| logging.info(f"📊 Après filtrage : {len(agg_nodes)} nœuds") | |
| if all_agg_nodes: | |
| sample = all_agg_nodes[0] | |
| logging.info(f"📊 Exemple nœud brut : {sample}") | |
| logging.info(f"📊 Valeur 'freq' : {sample.get('freq', 'NON TROUVÉ')}") | |
| if not agg_nodes: | |
| logging.error(f"❌ AUCUN NŒUD après filtrage à {frequency_threshold}%") | |
| logging.error(f" → Diminuez le seuil ou vérifiez aggregate.json") | |
| return '<div style="background:#f8d7da;padding:20px;border-radius:8px;">❌ Aucun nœud ne passe le seuil</div>' | |
| # Créer un set des IDs valides | |
| valid_node_ids = {n['id'] for n in agg_nodes} | |
| # Filtrer les arêtes (garder seulement celles entre nœuds valides) | |
| agg_edges = [ | |
| e for e in all_agg_edges | |
| if e['source'] in valid_node_ids and e['target'] in valid_node_ids | |
| ] | |
| logging.info(f"📊 Arêtes conservées : {len(agg_edges)}") | |
| # Afficher les nœuds conservés | |
| logging.info(f"\n📝 Nœuds conservés (≥{frequency_threshold}%) :") | |
| for i, n in enumerate(sorted(agg_nodes, key=lambda x: x.get('freq', 0), reverse=True)[:10], 1): | |
| logging.info(f" {i}. [{n.get('freq', 0):.1f}%] {n['label']}") | |
| if len(agg_nodes) > 10: | |
| logging.info(f" ... et {len(agg_nodes) - 10} autres") | |
| logging.info("═" * 70) | |
| gt_labels = [n["label"] for n in gt_nodes] | |
| agg_labels = [n["label"] for n in agg_nodes] | |
| matches = hybrid_match_optimized( | |
| agg_labels, gt_labels, | |
| fuzzy_threshold=fuzzy_threshold, | |
| semantic_threshold=semantic_threshold, | |
| alpha=0.6 | |
| ) | |
| label_mapping = {} | |
| matched_gt_labels = set() | |
| for agg_lbl, (match, combined_score, fuzzy_score, sem_score) in zip(agg_labels, matches): | |
| if match and match not in matched_gt_labels: | |
| label_mapping[agg_lbl] = match | |
| matched_gt_labels.add(match) | |
| gt_labels_set = set(gt_labels) | |
| num_common_nodes = len(matched_gt_labels) | |
| num_missing_nodes = len(gt_labels_set) - num_common_nodes | |
| num_extra_nodes = len(agg_labels) - num_common_nodes | |
| node_precision = (num_common_nodes / len(agg_labels) * 100) if agg_labels else 0 | |
| node_recall = (num_common_nodes / len(gt_labels) * 100) if gt_labels else 0 | |
| node_f1 = (2 * node_precision * node_recall / (node_precision + node_recall)) if ( | |
| node_precision + node_recall) > 0 else 0 | |
| gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes} | |
| agg_id2lbl = {n["id"]: n["label"] for n in agg_nodes} | |
| gt_edges_set = set() | |
| for e in gt_edges: | |
| src = gt_id2lbl.get(e["source"]) | |
| tgt = gt_id2lbl.get(e["target"]) | |
| if src and tgt: | |
| gt_edges_set.add(tuple(sorted([src, tgt]))) | |
| agg_edges_set = set() | |
| for e in agg_edges: | |
| src = agg_id2lbl.get(e["source"]) | |
| tgt = agg_id2lbl.get(e["target"]) | |
| if src and tgt: | |
| src_mapped = label_mapping.get(src, src) | |
| tgt_mapped = label_mapping.get(tgt, tgt) | |
| if src_mapped in gt_labels_set and tgt_mapped in gt_labels_set: | |
| agg_edges_set.add(tuple(sorted([src_mapped, tgt_mapped]))) | |
| common_edges = gt_edges_set & agg_edges_set | |
| missing_edges = gt_edges_set - agg_edges_set | |
| extra_edges = agg_edges_set - gt_edges_set | |
| edge_precision = (len(common_edges) / len(agg_edges_set) * 100) if agg_edges_set else 0 | |
| edge_recall = (len(common_edges) / len(gt_edges_set) * 100) if gt_edges_set else 0 | |
| edge_f1 = (2 * edge_precision * edge_recall / (edge_precision + edge_recall)) if ( | |
| edge_precision + edge_recall) > 0 else 0 | |
| global_score = (node_f1 + edge_f1) / 2 | |
| # Logs finaux | |
| logging.info("═" * 70) | |
| logging.info("🔍 RÉSULTATS DE COMPARAISON") | |
| logging.info("═" * 70) | |
| logging.info(f"📊 Nœuds GT : {len(gt_labels)} | Agrégé : {len(agg_labels)}") | |
| logging.info(f"📊 Matchés : {num_common_nodes} | Manquants : {num_missing_nodes} | En trop : {num_extra_nodes}") | |
| logging.info( | |
| f"📊 Arêtes GT : {len(gt_edges_set)} | Agrégé : {len(agg_edges_set)} | Communes : {len(common_edges)}") | |
| logging.info(f"🎯 Score global : {global_score:.1f}%") | |
| logging.info(f" • F1 nœuds : {node_f1:.1f}%") | |
| logging.info(f" • F1 arêtes : {edge_f1:.1f}%") | |
| logging.info("═" * 70) | |
| # Couleur selon le score | |
| if global_score >= 80: | |
| score_color = "#10b981" | |
| score_icon = "✅" | |
| score_label = "Excellent" | |
| elif global_score >= 60: | |
| score_color = "#3b82f6" | |
| score_icon = "👍" | |
| score_label = "Bon" | |
| elif global_score >= 40: | |
| score_color = "#f59e0b" | |
| score_icon = "⚠️" | |
| score_label = "Moyen" | |
| else: | |
| score_color = "#ef4444" | |
| score_icon = "❌" | |
| score_label = "Faible" | |
| # Générer le HTML (MÊME CODE QU'AVANT) | |
| metrics_html = f""" | |
| <div style="background: white; border: 2px solid #e0e0e0; border-radius: 12px; padding: 24px; margin: 20px 0; box-shadow: 0 4px 12px rgba(0,0,0,0.1);"> | |
| <!-- Score Global --> | |
| <div style="text-align: center; margin-bottom: 24px; padding: 20px; background: linear-gradient(135deg, {score_color}15 0%, {score_color}05 100%); border-radius: 8px; border: 2px solid {score_color};"> | |
| <div style="font-size: 3em; margin-bottom: 8px;">{score_icon}</div> | |
| <div style="font-size: 2.5em; font-weight: 700; color: {score_color}; margin-bottom: 4px;">{global_score:.1f}%</div> | |
| <div style="font-size: 1.1em; color: #555; font-weight: 600;">{score_label} - Correspondance avec le GT Original</div> | |
| <div style="font-size: 0.85em; color: #888; margin-top: 8px;">Marge d'amélioration : {100 - global_score:.1f}%</div> | |
| <div style="font-size: 0.75em; color: #999; margin-top: 4px; font-style: italic;"> | |
| Seuils: Fréquence {frequency_threshold}% | Fuzzy {fuzzy_threshold}% | Sémantique {semantic_threshold}% | |
| </div> | |
| <div style="font-size: 0.7em; color: #666; margin-top: 4px; padding: 6px; background: #f5f5f5; border-radius: 4px;"> | |
| ℹ️ Nœuds filtrés : {len(agg_nodes)}/{len(all_agg_nodes)} • Arêtes filtrées : {len(agg_edges)}/{len(all_agg_edges)} | |
| </div> | |
| </div> | |
| <!-- Métriques Détaillées --> | |
| <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 20px;"> | |
| <!-- Nœuds --> | |
| <div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 4px solid #667eea;"> | |
| <h4 style="margin: 0 0 12px 0; color: #667eea; font-size: 1em;">📊 Nœuds</h4> | |
| <div style="margin-bottom: 8px;"> | |
| <span style="color: #888; font-size: 0.85em;">Précision</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {node_precision}%; height: 100%; background: #667eea;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #667eea; min-width: 50px;">{node_precision:.1f}%</span> | |
| </div> | |
| </div> | |
| <div style="margin-bottom: 8px;"> | |
| <span style="color: #888; font-size: 0.85em;">Rappel</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {node_recall}%; height: 100%; background: #10b981;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #10b981; min-width: 50px;">{node_recall:.1f}%</span> | |
| </div> | |
| </div> | |
| <div> | |
| <span style="color: #888; font-size: 0.85em;">F1-Score</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {node_f1}%; height: 100%; background: #9b59b6;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #9b59b6; min-width: 50px;">{node_f1:.1f}%</span> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Arêtes --> | |
| <div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 4px solid #f59e0b;"> | |
| <h4 style="margin: 0 0 12px 0; color: #f59e0b; font-size: 1em;">🔗 Arêtes</h4> | |
| <div style="margin-bottom: 8px;"> | |
| <span style="color: #888; font-size: 0.85em;">Précision</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {edge_precision}%; height: 100%; background: #667eea;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #667eea; min-width: 50px;">{edge_precision:.1f}%</span> | |
| </div> | |
| </div> | |
| <div style="margin-bottom: 8px;"> | |
| <span style="color: #888; font-size: 0.85em;">Rappel</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {edge_recall}%; height: 100%; background: #10b981;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #10b981; min-width: 50px;">{edge_recall:.1f}%</span> | |
| </div> | |
| </div> | |
| <div> | |
| <span style="color: #888; font-size: 0.85em;">F1-Score</span> | |
| <div style="display: flex; align-items: center; gap: 8px;"> | |
| <div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;"> | |
| <div style="width: {edge_f1}%; height: 100%; background: #9b59b6;"></div> | |
| </div> | |
| <span style="font-weight: 700; color: #9b59b6; min-width: 50px;">{edge_f1:.1f}%</span> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Statistiques --> | |
| <div style="background: #fafafa; padding: 16px; border-radius: 8px;"> | |
| <h4 style="margin: 0 0 12px 0; color: #555; font-size: 0.95em;">📈 Statistiques Détaillées</h4> | |
| <div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; font-size: 0.85em;"> | |
| <div> | |
| <div style="color: #888;">GT Original</div> | |
| <div style="font-weight: 600; color: #2c3e50;">{len(gt_nodes)} nœuds, {len(gt_edges_set)} arêtes</div> | |
| </div> | |
| <div> | |
| <div style="color: #888;">Agrégé (filtré)</div> | |
| <div style="font-weight: 600; color: #2c3e50;">{len(agg_nodes)} nœuds, {len(agg_edges_set)} arêtes</div> | |
| </div> | |
| <div> | |
| <div style="color: #888;">Communs</div> | |
| <div style="font-weight: 600; color: #10b981;">{num_common_nodes} nœuds, {len(common_edges)} arêtes</div> | |
| </div> | |
| <div> | |
| <div style="color: #888;">Manquants</div> | |
| <div style="font-weight: 600; color: #ef4444;">{num_missing_nodes} nœuds, {len(missing_edges)} arêtes</div> | |
| </div> | |
| <div> | |
| <div style="color: #888;">En trop</div> | |
| <div style="font-weight: 600; color: #f59e0b;">{num_extra_nodes} nœuds, {len(extra_edges)} arêtes</div> | |
| </div> | |
| <div> | |
| <div style="color: #888;">Couverture</div> | |
| <div style="font-weight: 600; color: #3b82f6;">{node_recall:.1f}% du GT</div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Recommandations --> | |
| <div style="margin-top: 16px; padding: 12px; background: #eff6ff; border-left: 4px solid #3b82f6; border-radius: 4px;"> | |
| <div style="font-weight: 600; color: #1e40af; margin-bottom: 4px;">💡 Recommandations</div> | |
| <div style="font-size: 0.85em; color: #3b82f6;"> | |
| {'✅ Excellent ! Le graphe agrégé est très fidèle au GT original.' if global_score >= 80 else | |
| '👍 Bon résultat. Ajustez les seuils de clustering pour améliorer la couverture.' if global_score >= 60 else | |
| f'⚠️ Résultat moyen. Diminuez le seuil de fréquence (actuellement {frequency_threshold}%) ou les seuils fuzzy/sémantique pour capturer plus de nœuds.' if global_score >= 40 else | |
| f'❌ Faible correspondance. Diminuez drastiquement le seuil de fréquence (actuellement {frequency_threshold}%) et les seuils fuzzy/sémantique.'} | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| return metrics_html | |
| except Exception as e: | |
| logging.error(f"❌ Erreur comparaison agrégé/GT : {e}") | |
| import traceback | |
| logging.error(traceback.format_exc()) | |
| return f""" | |
| <div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#721c24;">❌ Erreur de comparaison</h3> | |
| <p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p> | |
| </div> | |
| """ | |
| # ============================================ | |
| # SECTION 9 : FONCTIONS AUXILIAIRES POUR L'INTERFACE | |
| # ============================================ | |
| def load_and_visualize_gt_original(gt_path=None): | |
| """ | |
| Charge le ground truth original et génère sa visualisation. | |
| Args: | |
| gt_path (str): Chemin vers le fichier JSON du GT original | |
| Returns: | |
| str: HTML iframe pour la visualisation | |
| """ | |
| # Utiliser le chemin par défaut si aucun chemin fourni | |
| if not gt_path: | |
| gt_path = GT_ORIGINAL_PATH | |
| if not os.path.exists(gt_path): | |
| error_html = f""" | |
| <div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#856404;">⚠️ Fichier GT introuvable</h3> | |
| <p style="color:#856404;">Chemin : {gt_path}</p> | |
| <p style="color:#856404;font-size:0.9em;">Vérifiez que le fichier existe ou modifiez GT_ORIGINAL_PATH dans le code</p> | |
| </div> | |
| """ | |
| return error_html | |
| try: | |
| # Charger le JSON | |
| with open(gt_path, 'r', encoding='utf-8') as f: | |
| gt_data = json.load(f) | |
| logging.info( | |
| f"✅ GT original chargé: {len(gt_data.get('nodes', []))} nœuds, {len(gt_data.get('edges', []))} arêtes") | |
| # Sauvegarder temporairement pour la visualisation | |
| temp_gt_path = "temp_gt_original.json" | |
| with open(temp_gt_path, 'w', encoding='utf-8') as f: | |
| json.dump(gt_data, f, ensure_ascii=False, indent=2) | |
| # Générer l'iframe | |
| ts = int(time.time()) | |
| src = f"http://127.0.0.1:8000/visual_gt_original.html?t={ts}" | |
| iframe_html = f""" | |
| <div style="border:1px solid #e0e0e0;border-radius:8px;overflow:hidden;background:white;"> | |
| <div style="background:linear-gradient(135deg, #10b981 0%, #059669 100%);padding:12px;border-bottom:1px solid #e0e0e0;"> | |
| <h4 style="margin:0;color:white;"> | |
| 📘 Ground Truth Original (Professeur) | |
| <span style="color:#d1fae5;font-size:0.85em;font-weight:normal;"> | |
| ({len(gt_data.get('nodes', []))} nœuds, {len(gt_data.get('edges', []))} arêtes) | |
| </span> | |
| </h4> | |
| <p style="margin:5px 0 0 0;font-size:0.75em;color:#d1fae5;"> | |
| Référence : {os.path.basename(gt_path)} | |
| </p> | |
| </div> | |
| <iframe src="{src}" | |
| style="width:100%;height:700px;border:none;background:white;display:block;"> | |
| </iframe> | |
| </div> | |
| """ | |
| return iframe_html | |
| except Exception as e: | |
| logging.error(f"❌ Erreur lors du chargement du GT: {e}") | |
| error_html = f""" | |
| <div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;"> | |
| <h3 style="color:#721c24;">❌ Erreur de chargement</h3> | |
| <p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p> | |
| </div> | |
| """ | |
| return error_html | |
| def download_aggregated_graph(): | |
| """Télécharge le graphe agrégé au format JSON.""" | |
| try: | |
| if os.path.exists("aggregated_graph.json"): | |
| return "aggregated_graph.json" | |
| else: | |
| return None | |
| except Exception as e: | |
| logging.error(f"Erreur lors du téléchargement du graphe agrégé: {e}") | |
| return None | |
| def generate_pdf_report(): | |
| """Génère un rapport PDF des résultats.""" | |
| try: | |
| # Vérifier s'il y a des résultats | |
| if os.path.exists("reports"): | |
| report_files = glob.glob("reports/*.pdf") | |
| if report_files: | |
| return report_files[-1] # Dernier rapport généré | |
| # Sinon générer un rapport vide | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_font("Arial", size=16) | |
| pdf.cell(200, 10, txt="Rapport Doc2GL", ln=True, align='C') | |
| pdf.set_font("Arial", size=12) | |
| pdf.cell(200, 10, txt="Aucune donnée à afficher", ln=True, align='C') | |
| os.makedirs("reports", exist_ok=True) | |
| report_path = "reports/rapport_vide.pdf" | |
| pdf.output(report_path) | |
| return report_path | |
| except Exception as e: | |
| logging.error(f"Erreur lors de la génération du rapport PDF: {e}") | |
| return None | |
| # ============================================ | |
| # SECTION 10 : INTERFACE GRADIO | |
| # ============================================ | |
| def create_auth_interface(): | |
| """Crée une interface d'authentification simple.""" | |
| app_password = os.environ.get("APP_PASSWORD", "") | |
| def check_login(username, password): | |
| if not app_password: | |
| return True, "Accès autorisé" | |
| if username == "admin" and password == app_password: | |
| return True, "Accès autorisé" | |
| return False, "Identifiants incorrects" | |
| with gr.Blocks( | |
| title="Doc2GL - Connexion", | |
| css=""".auth-container { | |
| max-width: 400px; | |
| margin: 100px auto; | |
| padding: 40px; | |
| background: #18181b; | |
| border: 1px solid #27272a; | |
| border-radius: 16px; | |
| text-align: center; | |
| } | |
| .auth-title { | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .auth-subtitle { | |
| color: #a1a1aa; | |
| margin-bottom: 30px; | |
| } | |
| """, | |
| theme=gr.themes.Soft() | |
| ) as auth_demo: | |
| with gr.Column(elem_classes=["auth-container"]): | |
| gr.HTML('<h1 class="auth-title">🧠 Doc2GL</h1>') | |
| gr.HTML('<p class="auth-subtitle">Document to Graph Learning</p>') | |
| with gr.Column(): | |
| username = gr.Textbox( | |
| label="Nom d'utilisateur", | |
| placeholder="admin", | |
| value="admin" | |
| ) | |
| password = gr.Textbox( | |
| label="Mot de passe", | |
| type="password", | |
| placeholder="AZERTY123" | |
| ) | |
| login_btn = gr.Button( | |
| "🔐 Se connecter", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| result = gr.HTML("") | |
| login_btn.click( | |
| fn=check_login, | |
| inputs=[username, password], | |
| outputs=result | |
| ) | |
| return auth_demo | |
| def gradio_interface(): | |
| """Point d'entrée principal pour l'interface Gradio.""" | |
| # Vérifier si un mot de passe est configuré | |
| app_password = os.environ.get("APP_PASSWORD", "") | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # CSS PERSONNALISÉ (Variables CSS pour personnalisation facile) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| custom_css = r""" | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:ital,opsz,wght@0,14..32,100..900;1,14..32,100..900&display=swap'); | |
| /* ===== GLOBAL RESET ===== */ | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; } | |
| :root { | |
| --bg: #09090b; | |
| --surface: #18181b; | |
| --surface-2: #1f1f23; | |
| --surface-3: #27272a; | |
| --border: #27272a; | |
| --border-light: #3f3f46; | |
| --text: #fafafa; | |
| --text-2: #a1a1aa; | |
| --text-3: #71717a; | |
| --text-4: #52525b; | |
| --violet-400: #a78bfa; | |
| --violet-500: #8b5cf6; | |
| --violet-600: #7c3aed; | |
| --violet-900: rgba(139,92,246,0.12); | |
| --emerald-400: #34d399; | |
| --emerald-500: #10b981; | |
| --emerald-600: #059669; | |
| --emerald-900: rgba(52,211,153,0.12); | |
| --amber-900: rgba(245,158,11,0.15); | |
| --amber-400: #fbbf24; | |
| --font: 'Inter', system-ui, -apple-system, sans-serif; | |
| } | |
| /* ===== AUTH CONTAINER ===== */ | |
| .auth-container { | |
| max-width: 400px; | |
| margin: 100px auto; | |
| padding: 40px; | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 16px; | |
| text-align: center; | |
| } | |
| .auth-title { | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| margin-bottom: 10px; | |
| background: linear-gradient(135deg, var(--violet-400), var(--emerald-400)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .auth-subtitle { | |
| color: var(--text-3); | |
| margin-bottom: 30px; | |
| } | |
| /* ===== BASE ===== */ | |
| body { | |
| background: var(--bg) !important; | |
| color: var(--text) !important; | |
| font-family: var(--font) !important; | |
| -webkit-font-smoothing: antialiased; | |
| } | |
| .gradio-container { | |
| background: var(--bg) !important; | |
| max-width: 1100px !important; | |
| margin: 0 auto !important; | |
| padding: 0 32px 60px !important; | |
| font-family: var(--font) !important; | |
| color: var(--text) !important; | |
| } | |
| /* ===== HIDE GRADIO CHROME ===== */ | |
| footer, .built-with, .show-api, .svelte-1ed2p3z, | |
| .gradio-container > .prose, | |
| #component-0 > .prose { display: none !important; } | |
| .gradio-container .gap { gap: 16px !important; } | |
| .block { border: none !important; box-shadow: none !important; background: transparent !important; padding: 0 !important; } | |
| .form { background: transparent !important; border: none !important; gap: 20px !important; } | |
| .panel { background: transparent !important; border: none !important; } | |
| /* ===== HERO ===== */ | |
| .hero { | |
| position: relative; | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 28px; | |
| padding: 72px 48px 64px; | |
| margin: 16px 0 40px; | |
| text-align: center; | |
| overflow: hidden; | |
| } | |
| .hero::before { | |
| content: ''; | |
| position: absolute; inset: 0; | |
| background: | |
| radial-gradient(ellipse 80% 60% at 50% -10%, rgba(139,92,246,0.25), transparent), | |
| radial-gradient(ellipse 60% 50% at 80% 110%, rgba(52,211,153,0.1), transparent); | |
| pointer-events: none; | |
| } | |
| .hero-badge { | |
| display: inline-flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.05); | |
| border: 1px solid var(--border-light); | |
| padding: 7px 18px; | |
| border-radius: 100px; | |
| font-size: 0.8rem; font-weight: 500; | |
| color: var(--text-2); | |
| margin-bottom: 24px; | |
| } | |
| .hero-badge .live { | |
| width: 8px; height: 8px; | |
| background: var(--emerald-400); | |
| border-radius: 50%; | |
| box-shadow: 0 0 8px var(--emerald-400); | |
| animation: blink 2.5s ease-in-out infinite; | |
| } | |
| @keyframes blink { | |
| 0%,100% { opacity:1; } 50% { opacity:0.3; } | |
| } | |
| .hero h1 { | |
| color: var(--text) !important; | |
| font-size: 3rem; | |
| font-weight: 800; | |
| letter-spacing: -0.04em; | |
| line-height: 1; | |
| margin: 0 0 12px; | |
| } | |
| .hero h1 span { | |
| background: linear-gradient(135deg, var(--violet-400), var(--emerald-400)); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .hero p { | |
| color: var(--text-3) !important; | |
| font-size: 1.1rem; | |
| font-weight: 400; | |
| margin: 0; | |
| line-height: 1.6; | |
| } | |
| /* ===== CARDS ===== */ | |
| .card-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; margin-bottom: 24px; } | |
| .card { | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 16px; | |
| padding: 24px; | |
| transition: all 0.15s ease; | |
| } | |
| .card:hover { border-color: var(--border-light); box-shadow: 0 4px 20px rgba(0,0,0,0.3); } | |
| .card-icon { | |
| width: 40px; height: 40px; | |
| border-radius: 12px; | |
| display: flex; align-items: center; justify-content: center; | |
| font-size: 1.1rem; | |
| margin-bottom: 14px; | |
| } | |
| .card-icon.violet { background: var(--violet-900); } | |
| .card-icon.emerald { background: var(--emerald-900); } | |
| .card h3 { font-size: 0.92rem; font-weight: 700; color: var(--text); margin: 0 0 6px; } | |
| .card p { font-size: 0.82rem; color: var(--text-3); line-height: 1.55; margin: 0; } | |
| /* ===== SECTION ===== */ | |
| .section-wrap { | |
| background: var(--surface); | |
| border: 1px solid var(--border); | |
| border-radius: 20px; | |
| padding: 28px; | |
| margin: 24px 0; | |
| } | |
| .section-label { | |
| display: inline-block; | |
| font-size: 0.68rem; font-weight: 700; | |
| text-transform: uppercase; letter-spacing: 0.08em; | |
| padding: 4px 12px; | |
| border-radius: 100px; | |
| margin-bottom: 16px; | |
| } | |
| .section-label.optional { background: var(--amber-900); color: var(--amber-400); } | |
| .section-label.results { background: var(--violet-900); color: var(--violet-400); } | |
| .section-title { | |
| font-size: 1.05rem; font-weight: 700; color: var(--text); | |
| margin: 0 0 4px; | |
| } | |
| .section-desc { | |
| font-size: 0.84rem; color: var(--text-3); margin: 0 0 20px; line-height: 1.55; | |
| } | |
| /* ===== BUTTONS ===== */ | |
| button, .gr-button { font-family: var(--font) !important; cursor: pointer; } | |
| button.primary-button, .primary-button > button { | |
| background: var(--text) !important; | |
| color: var(--bg) !important; | |
| font-size: 0.92rem !important; | |
| padding: 13px 28px !important; | |
| font-weight: 600 !important; | |
| border-radius: 12px !important; | |
| border: none !important; | |
| box-shadow: 0 0 0 1px rgba(255,255,255,0.1) !important; | |
| transition: all 0.15s ease !important; | |
| letter-spacing: -0.01em !important; | |
| } | |
| button.primary-button:hover, .primary-button > button:hover { | |
| background: #e4e4e7 !important; | |
| box-shadow: 0 4px 16px rgba(255,255,255,0.06) !important; | |
| transform: translateY(-1px) !important; | |
| } | |
| button.primary-button:active, .primary-button > button:active { | |
| transform: translateY(0) !important; | |
| } | |
| button.secondary-button, .secondary-button > button { | |
| background: var(--surface-2) !important; | |
| color: var(--text-2) !important; | |
| font-weight: 600 !important; | |
| border-radius: 12px !important; | |
| border: 1px solid var(--border) !important; | |
| box-shadow: none !important; | |
| transition: all 0.15s ease !important; | |
| font-size: 0.88rem !important; | |
| } | |
| button.secondary-button:hover, .secondary-button > button:hover { | |
| background: var(--surface-3) !important; | |
| color: var(--text) !important; | |
| border-color: #52525b !important; | |
| } | |
| /* ===== TABS ===== */ | |
| .tabs { margin-top: 0 !important; } | |
| .tabs > .tab-nav { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 14px !important; | |
| padding: 5px !important; | |
| gap: 4px !important; | |
| margin-bottom: 16px !important; | |
| display: inline-flex !important; | |
| } | |
| .tabs > .tab-nav > button { | |
| background: transparent !important; | |
| color: var(--text-3) !important; | |
| border: none !important; | |
| border-radius: 10px !important; | |
| padding: 10px 20px !important; | |
| font-weight: 600 !important; | |
| font-size: 0.84rem !important; | |
| transition: all 0.12s ease !important; | |
| } | |
| .tabs > .tab-nav > button.selected { | |
| background: var(--surface-2) !important; | |
| color: var(--text) !important; | |
| } | |
| .tabitem { | |
| background: var(--surface) !important; | |
| border-radius: 20px !important; | |
| padding: 32px !important; | |
| border: 1px solid var(--border) !important; | |
| } | |
| /* ===== FORM ELEMENTS ===== */ | |
| input, textarea, select { | |
| background: var(--surface-2) !important; | |
| color: var(--text) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 10px !important; | |
| font-family: var(--font) !important; | |
| font-size: 0.9rem !important; | |
| padding: 10px 14px !important; | |
| transition: all 0.12s ease !important; | |
| } | |
| input:focus, textarea:focus, select:focus { | |
| border-color: var(--violet-500) !important; | |
| box-shadow: 0 0 0 2px rgba(139,92,246,0.1) !important; | |
| outline: none !important; | |
| } | |
| .wrap { background: var(--surface-2) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; } | |
| label, .label-wrap > span { | |
| color: var(--text-2) !important; | |
| font-weight: 600 !important; | |
| font-size: 0.82rem !important; | |
| } | |
| .gr-check-radio { accent-color: var(--violet-500) !important; } | |
| .gradio-slider input[type="range"] { accent-color: var(--violet-500) !important; } | |
| .info { color: var(--text-4) !important; font-size: 0.78rem !important; } | |
| /* ===== ACCORDION ===== */ | |
| .gradio-accordion { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border) !important; | |
| border-radius: 14px !important; | |
| margin: 16px 0 !important; | |
| overflow: hidden; | |
| } | |
| .gradio-accordion > .label-wrap { | |
| padding: 16px 20px !important; | |
| font-weight: 600 !important; | |
| background: var(--surface-2) !important; | |
| } | |
| /* ===== HELP NOTES ===== */ | |
| .help-note { display: none; } | |
| /* ===== GRAPH PANELS ===== */ | |
| .graph-panel-header { | |
| padding: 14px 20px; | |
| border-radius: 14px 14px 0 0; | |
| text-align: center; | |
| font-weight: 700; | |
| font-size: 0.88rem; | |
| letter-spacing: -0.01em; | |
| } | |
| .graph-panel-header.gt { | |
| background: linear-gradient(135deg, var(--emerald-500), var(--emerald-600)); | |
| color: white; | |
| } | |
| .graph-panel-header.test { | |
| background: linear-gradient(135deg, var(--violet-500), var(--violet-600)); | |
| color: white; | |
| } | |
| .graph-panel-header.agg { | |
| background: linear-gradient(135deg, #f59e0b, #d97706); | |
| color: white; | |
| } | |
| .graph-placeholder { | |
| background: var(--surface-2); | |
| border: 2px dashed var(--border); | |
| border-radius: 14px; | |
| padding: 48px; | |
| text-align: center; | |
| color: var(--text-4); | |
| display: flex; | |
| flex-direction: column; | |
| align-items: center; justify-content: center; | |
| gap: 16px; | |
| font-size: 0.88rem; font-weight: 500; | |
| } | |
| .graph-placeholder svg { | |
| width: 40px; height: 40px; | |
| stroke: var(--text-4); | |
| } | |
| /* ===== FILE UPLOAD ===== */ | |
| .gradio-file, .gradio-files { border-radius: 14px !important; } | |
| /* ===== GALLERY & PLOTS ===== */ | |
| .gradio-gallery { border-radius: 14px !important; overflow: hidden; border: 1px solid var(--border) !important; } | |
| .gradio-plot { background: var(--surface) !important; border-radius: 14px !important; border: 1px solid var(--border) !important; } | |
| /* ===== SCROLLBAR ===== */ | |
| ::-webkit-scrollbar { width: 5px; height: 5px; } | |
| ::-webkit-scrollbar-track { background: transparent; } | |
| ::-webkit-scrollbar-thumb { background: var(--surface-3); border-radius: 10px; } | |
| /* ===== RESPONSIVE ===== */ | |
| @media (max-width: 768px) { | |
| .card-grid { grid-template-columns: 1fr; } | |
| .hero h1 { font-size: 2rem; } | |
| .hero { padding: 48px 24px 40px; border-radius: 20px; } | |
| .gradio-container { padding: 0 16px 40px !important; } | |
| } | |
| """ | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # CONSTRUCTION DE L'INTERFACE (AVEC OU SANS AUTH) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| with gr.Blocks(title="Doc2GL", css=custom_css, theme=gr.themes.Base()) as demo: | |
| # État d'authentification | |
| authenticated = gr.State(False) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # CONTENEUR D'AUTHENTIFICATION (visible si non authentifié) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| with gr.Column(visible=bool(app_password), elem_id="login_container") as login_container: | |
| with gr.Column(elem_classes=["auth-container"]): | |
| gr.HTML('<h1 class="auth-title">🧠 Doc2GL</h1>') | |
| gr.HTML('<p class="auth-subtitle">Document to Graph Learning</p>') | |
| with gr.Column(): | |
| username = gr.Textbox( | |
| label="Nom d'utilisateur", | |
| placeholder="admin", | |
| value="admin" | |
| ) | |
| password = gr.Textbox( | |
| label="Mot de passe", | |
| type="password", | |
| placeholder="Entrez votre mot de passe" | |
| ) | |
| login_btn = gr.Button( | |
| "🔐 Se connecter", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| auth_result = gr.HTML("") | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # CONTENEUR PRINCIPAL (visible si authentifié ou si pas de mot de passe) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| with gr.Column(visible=not bool(app_password), elem_id="app_container") as app_container: | |
| # Variable d'état (non utilisée dans ce code mais disponible) | |
| redirect_state = gr.State("") | |
| # ───────────────────────────────────────────────────────────────── | |
| # HEADER | |
| # ───────────────────────────────────────────────────────────────── | |
| gr.HTML(''' | |
| <div class="hero"> | |
| <div class="hero-badge"><span class="live"></span> Document Intelligence</div> | |
| <h1>Doc<span>2</span>GL</h1> | |
| <p>Transformez vos documents en graphes de connaissances intelligents</p> | |
| </div> | |
| ''') | |
| # ───────────────────────────────────────────────────────────────── | |
| # ÉTAPES 1-2: Upload et Modèle | |
| # ───────────────────────────────────────────────────────────────── | |
| gr.HTML(''' | |
| <div class="card-grid"> | |
| <div class="card"> | |
| <div class="card-icon violet">📄</div> | |
| <h3>Importez vos documents</h3> | |
| <p>Glissez vos PDF ou images ci-dessous pour commencer l'analyse.</p> | |
| </div> | |
| <div class="card"> | |
| <div class="card-icon emerald">🤖</div> | |
| <h3>Choisissez votre modèle</h3> | |
| <p>Sélectionnez le modèle d'IA le plus adapté à vos documents.</p> | |
| </div> | |
| </div> | |
| ''') | |
| # Zone d'upload et sélection du modèle (côte à côte) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.Files( | |
| label="📁 Documents (PDF/Images)", | |
| file_types=[".pdf", ".jpg", ".jpeg", ".png"] | |
| ) | |
| with gr.Column(scale=1): | |
| model_selector = gr.Dropdown( | |
| label="🤖 Modèle d'IA", | |
| choices=[ | |
| "Gemini 1.5 Flash", | |
| "Gemma 2 9B", | |
| "LLaMA 3 8B", | |
| "Qwen 2 7B", | |
| "NVIDIA Nemotron Nano 12B", | |
| "InternVL 2 8B" | |
| ], | |
| value="Gemini 1.5 Flash", | |
| info="Choisissez le modèle pour générer les graphes" | |
| ) | |
| # Options de traitement | |
| with gr.Row(): | |
| use_semantic_checkbox = gr.Checkbox( | |
| label="🧠 Utiliser la similarité sémantique", | |
| value=True, | |
| info="Combine fuzzy matching et embeddings pour une meilleure précision" | |
| ) | |
| with gr.Accordion("⚙️ Paramètres avancés", open=False): | |
| with gr.Row(): | |
| fuzzy_threshold_slider = gr.Slider( | |
| label="🎯 Seuil fuzzy (%)", | |
| minimum=60, | |
| maximum=100, | |
| value=80, | |
| step=5, | |
| info="Seuil de similarité textuelle (fuzzy matching)" | |
| ) | |
| semantic_threshold_slider = gr.Slider( | |
| label="🔍 Seuil sémantique (%)", | |
| minimum=50, | |
| maximum=90, | |
| value=70, | |
| step=5, | |
| info="Seuil de similarité sémantique (embeddings)" | |
| ) | |
| with gr.Row(): | |
| alpha_slider = gr.Slider( | |
| label="⚖️ Alpha (poids sémantique)", | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.6, | |
| step=0.1, | |
| info="Poids du matching sémantique vs fuzzy (0=fuzzy seul, 1=sémantique seul)" | |
| ) | |
| edge_mode_radio = gr.Radio( | |
| label="🔗 Mode de détection des arêtes", | |
| choices=["hybrid", "co-occurrence", "semantic"], | |
| value="hybrid", | |
| info="Stratégie pour détecter les relations entre entités" | |
| ) | |
| # Bouton de génération | |
| generate_btn = gr.Button( | |
| "🚀 Générer les graphes", | |
| variant="primary", | |
| size="lg", | |
| elem_classes=["primary-button"] | |
| ) | |
| # ───────────────────────────────────────────────────────────────── | |
| # RÉSULTATS (Galerie + Mermaid) | |
| # ───────────────────────────────────────────────────────────────── | |
| gr.HTML(''' | |
| <div class="section-wrap"> | |
| <span class="section-label results">📊 Résultats</span> | |
| <h3 class="section-title">Graphes générés</h3> | |
| <p class="section-desc">Visualisez les graphes extraits de vos documents</p> | |
| </div> | |
| ''') | |
| image_preview = gr.Gallery(label="📷 Pages analysées", show_label=True, elem_id="gallery", columns=2, height="auto", allow_preview=True) | |
| mermaid_output = gr.Textbox(label="📝 Code Mermaid", lines=10, interactive=True, placeholder="Le code Mermaid apparaîtra ici après génération...") | |
| # Dropdown pour sélectionner un document | |
| docs_dropdown = gr.Dropdown( | |
| label="📄 Sélectionner un document", | |
| choices=[], | |
| info="Choisissez un document pour visualiser son graphe détaillé" | |
| ) | |
| # Conteneur pour les iframes (stockage interne) | |
| hidden_iframes = gr.State({}) | |
| # Panneaux pour les graphes GT et Test | |
| with gr.Row(): | |
| with gr.Column(): | |
| gt_graph_panel = gr.HTML( | |
| value='<div class="graph-placeholder">Sélectionnez un document pour voir le graphe GT</div>', | |
| label="📊 Graphe Ground Truth" | |
| ) | |
| with gr.Column(): | |
| test_graph_panel = gr.HTML( | |
| value='<div class="graph-placeholder">Sélectionnez un document pour voir le graphe Test</div>', | |
| label="🔬 Graphe Généré" | |
| ) | |
| # Résumé des performances | |
| comparison_result_html = gr.HTML( | |
| value='<div style="text-align:center;padding:24px;color:var(--text-4);font-size:0.86rem;font-weight:500;">Les métriques apparaîtront ici après génération</div>', | |
| label="📈 Résumé" | |
| ) | |
| # Statut du rapport | |
| report_status = gr.HTML( | |
| value="", | |
| visible=False | |
| ) | |
| # ───────────────────────────────────────────────────────────────── | |
| # ONGLETS : Vue agrégée vs GT Original + Performances | |
| # ───────────────────────────────────────────────────────────────── | |
| with gr.Tabs(elem_classes=["tabs"]): | |
| with gr.TabItem("📊 Vue agrégée"): | |
| gr.HTML(''' | |
| <div class="section-wrap"> | |
| <span class="section-label optional">🔗 Vue agrégée</span> | |
| <h3 class="section-title">Graphe consolidé</h3> | |
| <p class="section-desc">Vue d'ensemble de tous les graphes combinés</p> | |
| </div> | |
| ''') | |
| aggregated_graph = gr.HTML( | |
| value='<div class="graph-placeholder">Les graphes agrégés apparaîtront ici après traitement</div>', | |
| label="🔗 Graphe agrégé" | |
| ) | |
| with gr.Row(): | |
| aggregate_btn = gr.Button( | |
| "🔗 Agréger les graphes", | |
| variant="secondary", | |
| elem_classes=["secondary-button"] | |
| ) | |
| download_agg_btn = gr.Button( | |
| "📥 Télécharger (JSON)", | |
| variant="secondary", | |
| elem_classes=["secondary-button"] | |
| ) | |
| with gr.TabItem("📈 Performances"): | |
| gr.HTML(''' | |
| <div class="section-wrap"> | |
| <span class="section-label results">📈 Performances</span> | |
| <h3 class="section-title">Métriques détaillées</h3> | |
| <p class="section-desc">Analyse quantitative de la qualité des graphes générés</p> | |
| </div> | |
| ''') | |
| performance_plot = gr.Plot( | |
| label="📊 Boxplots des métriques", | |
| visible=False | |
| ) | |
| download_report_btn = gr.Button( | |
| "📄 Générer le rapport PDF", | |
| variant="primary", | |
| elem_classes=["primary-button"] | |
| ) | |
| # ───────────────────────────────────────────────────────────────── | |
| # GESTION DES ÉVÉNEMENTS | |
| # ───────────────────────────────────────────────────────────────── | |
| # Authentification | |
| def check_login(username, password): | |
| if not app_password: | |
| return True, gr.update(visible=False), gr.update(visible=True) | |
| if username == "admin" and password == app_password: | |
| return True, gr.update(visible=False), gr.update(visible=True) | |
| return False, gr.update(visible=True), gr.update(visible=False) | |
| def show_auth_error(success): | |
| if success: | |
| return "" | |
| return '<div style="color: #ef4444; text-align: center; margin-top: 10px;">❌ Identifiants incorrects</div>' | |
| login_btn.click( | |
| fn=check_login, | |
| inputs=[username, password], | |
| outputs=[authenticated, login_container, app_container] | |
| ).then( | |
| fn=show_auth_error, | |
| inputs=[authenticated], | |
| outputs=[auth_result] | |
| ) | |
| # Génération des graphes | |
| generate_btn.click( | |
| fn=process_files, | |
| inputs=[ | |
| file_input, | |
| model_selector, | |
| use_semantic_checkbox, | |
| fuzzy_threshold_slider, | |
| semantic_threshold_slider, | |
| alpha_slider, | |
| edge_mode_radio | |
| ], | |
| outputs=[ | |
| image_preview, | |
| mermaid_output, | |
| docs_dropdown, | |
| hidden_iframes, | |
| gt_graph_panel, | |
| test_graph_panel, | |
| comparison_result_html, | |
| report_status | |
| ] | |
| ) | |
| # Sélection de document | |
| docs_dropdown.change( | |
| fn=select_graph, | |
| inputs=[docs_dropdown, hidden_iframes], | |
| outputs=[gt_graph_panel, test_graph_panel] | |
| ) | |
| # Comparaison avec GT original | |
| with gr.Row(): | |
| gt_file_input = gr.File( | |
| label="📁 Ground Truth original (JSON)", | |
| file_types=[".json"], | |
| visible=False | |
| ) | |
| compare_with_gt_btn = gr.Button( | |
| "🔍 Comparer avec GT original", | |
| variant="secondary", | |
| elem_classes=["secondary-button"], | |
| visible=False | |
| ) | |
| # Téléchargements | |
| download_agg_btn.click( | |
| fn=download_aggregated_graph, | |
| inputs=[], | |
| outputs=[gr.File()] | |
| ) | |
| download_report_btn.click( | |
| fn=generate_pdf_report, | |
| inputs=[], | |
| outputs=[gr.File()] | |
| ) | |
| # ═════════════════════════════════════════════════════════════════════ | |
| # LANCEMENT DE L'INTERFACE | |
| # ═════════════════════════════════════════════════════════════════════ | |
| demo.launch(show_api=False) | |
| # ============================================ | |
| # POINT D'ENTRÉE DU PROGRAMME | |
| # ============================================ | |
| def start_http_server(port=8000): | |
| """Lance un serveur HTTP en arrière-plan pour servir les fichiers statiques | |
| (visual.html, neovis.js, graph_diff JSON, etc.) nécessaires aux iframes.""" | |
| import http.server | |
| import socketserver | |
| import threading | |
| handler = http.server.SimpleHTTPRequestHandler | |
| try: | |
| httpd = socketserver.TCPServer(("", port), handler) | |
| thread = threading.Thread(target=httpd.serve_forever, daemon=True) | |
| thread.start() | |
| logging.info(f"Serveur HTTP démarré sur le port {port}") | |
| except Exception as e: | |
| logging.warning(f"Impossible de démarrer le serveur HTTP sur le port {port}: {e}") | |
| if __name__ == "__main__": | |
| # Vérifier l'authentification si APP_PASSWORD est configuré | |
| app_password = os.environ.get("APP_PASSWORD", "") | |
| if app_password: | |
| # Lancer l'interface avec authentification | |
| start_http_server(8000) | |
| gradio_interface() | |
| else: | |
| # Pas de mot de passe, lancer directement l'interface principale | |
| start_http_server(8000) | |
| gradio_interface() | |