Spaces:

LILI23704
/

doc2gl

Sleeping

doc2gl / demo_user.py

Doc2GL Deploy

Gemini: add model fallback list and GEMINI_MODEL override to avoid 404 NOT_FOUND

7a2d1b0 about 1 month ago

194 kB

	"""
	╔══════════════════════════════════════════════════════════════════════════════╗
	║ DOC2GL v2.0 ║
	║ ║
	║ Description : Système de conversion de documents (PDF/Images) en graphes ║
	║ de connaissances avec évaluation de la qualité par ║
	║ comparaison fuzzy et sémantique ║
	║ ║
	║ Auteur : YOUMBI CHATUE DANIELE ║
	║ Date : [10/09/2025] ║
	║ Version : 2.0 ║
	╚══════════════════════════════════════════════════════════════════════════════╝
	"""

	# ============================================
	# IMPORTS DES BIBLIOTHÈQUES
	# ============================================

	# Interface utilisateur
	import gradio as gr
	gr_update = gr.update
	# Manipulation de fichiers et système
	import glob
	import os
	import io
	import shutil
	import tempfile
	import time
	import json
	import base64
	import logging

	# Traitement de documents

	import fitz # PyMuPDF pour la conversion PDF → Image
	from PIL import Image # Manipulation d'images
	# Base de données de graphes
	from neo4j import GraphDatabase # Stockage des graphes de connaissances

	# Modèles d'IA pour génération de graphes
	import google.genai as genai
	from google.genai import types
	from gemma import generate_mermaid_from_gemma
	from llama import generate_mermaid_from_llama
	from Qwen import generate_mermaid_from_qwen
	from nvidia_nemotron import generate_mermaid_from_nvidia
	from intern_vl import generate_mermaid_from_intern

	# Génération de rapports
	from fpdf import FPDF
	import matplotlib.pyplot as plt

	# Comparaison et matching
	from rapidfuzz import process, fuzz # Fuzzy matching (similarité textuelle)
	from sentence_transformers import SentenceTransformer, util # Matching sémantique
	import torch
	import numpy as np
	from dotenv import load_dotenv
	# Agrégation de mindmaps
	from aggregation import aggregate_mindmaps

	# En haut de app.py, après les imports
	__all__ = [
	'convert_pdf_to_image',
	'encode_image',
	'generate_mermaid_from_image_gemini',
	'generate_mermaid_from_llama',
	'generate_mermaid_from_gemma',
	'generate_mermaid_from_qwen',
	'generate_mermaid_from_nvidia',
	'generate_mermaid_from_intern',
	'mermaid_to_json',
	'load_json',
	'compare_graphs_with_semantic_fast'
	]

	load_dotenv()

	_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
	os.chdir(_SCRIPT_DIR)

	# ============================================
	# CONFIGURATION GLOBALE
	# ============================================

	# Configuration du système de logging pour tracer les opérations
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	# ─────────────────────────────────────────────────────────────────────────────
	# Configuration Neo4j (Base de données de graphes)
	# ─────────────────────────────────────────────────────────────────────────────
	NEO4J_URI = os.environ.get("NEO4J_URI", "bolt://localhost:7687") # Adresse du serveur Neo4j
	NEO4J_USER = os.environ.get("NEO4J_USER", "neo4j") # Nom d'utilisateur
	NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD", "123456789") # Mot de passe
	TEMP_FOLDER = "temp_images" # Dossier temporaire pour les images
	OUTPUT_JSON_DIR = os.path.join(_SCRIPT_DIR, "json_files")
	GT_ORIGINAL_PATH = os.path.join(_SCRIPT_DIR, "GT", "OOP_MindMAP_GroundTruth_00.json")

	# ─────────────────────────────────────────────────────────────────────────────
	# Variables globales pour le suivi des performances
	# ─────────────────────────────────────────────────────────────────────────────
	model_precision_records = [] # Stocke les métriques de chaque modèle testé

	# ─────────────────────────────────────────────────────────────────────────────
	# Configuration du modèle de matching sémantique (lazy loading)
	# ─────────────────────────────────────────────────────────────────────────────
	# Modèle multilingue optimisé pour la vitesse (12 couches au lieu de 24)
	_semantic_model = None

	def get_semantic_model():
	global _semantic_model
	if _semantic_model is None:
	_semantic_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
	_semantic_model.eval()
	return _semantic_model


	# ============================================
	# SECTION 1 : FONCTIONS DE BASE (UTILITAIRES)
	# ============================================

	def convert_pdf_to_image(pdf_path, output_folder=TEMP_FOLDER):
	"""Convertit la première page d'un PDF en image PNG."""
	try:
	os.makedirs(output_folder, exist_ok=True)
	pdf_document = fitz.open(pdf_path)
	page = pdf_document[0]
	pix = page.get_pixmap()
	pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
	image_path = os.path.join(output_folder, f"{pdf_name}_page1.png")
	pix.save(image_path)
	pdf_document.close()
	logging.info(f"PDF {pdf_path} converti en image {image_path} avec succès.")
	return image_path
	except Exception as e:
	logging.error(f"Erreur lors de la conversion du PDF : {e}")
	raise


	def encode_image(img_path):
	"""Encode une image en Base64."""
	try:
	with open(img_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')
	except Exception as e:
	logging.error(f"Erreur lors de l'encodage de l'image : {e}")
	raise


	def escape_string(value):
	"""Échappe les caractères problématiques pour Cypher."""
	return value.replace("'", "''").replace("#", "\\#").replace('"', '\\"')


	def load_json(file_path):
	"""Charge un fichier JSON contenant un graphe."""
	with open(file_path, 'r', encoding='utf-8') as f:
	data = json.load(f)
	return data["nodes"], data["edges"]


	# ============================================
	# SECTION 2 : GÉNÉRATION DE GRAPHES PAR LES MODÈLES D'IA
	# ============================================

	def generate_mermaid_from_image_gemini(base64_image):
	"""Génère un diagramme Mermaid à partir d'une image avec Gemini."""
	api_key = os.environ.get("GEMINI_API_KEY")
	if not api_key:
	raise EnvironmentError("La clé API GEMINI_API_KEY n'est pas définie.")

	model_from_env = os.environ.get("GEMINI_MODEL", "").strip()
	model_candidates = [
	model_from_env,
	"gemini-2.0-flash",
	"gemini-2.0-flash-lite",
	"gemini-1.5-flash-latest",
	"gemini-1.5-pro-latest",
	]
	model_candidates = [m for m in model_candidates if m]

	try:
	client = genai.Client(api_key=api_key)
	prompt = """
	Analyse cette image et extrais les entités et leurs relations pour créer un graphe de connaissances.

	Génère UNIQUEMENT un diagramme Mermaid valide avec:
	- Les nœuds principaux (personnes, organisations, concepts, etc.)
	- Les relations entre eux
	- Format: graph TD; A[Entité1] --> B[Entité2];

	Important:
	- Ne génère QUE le code Mermaid, sans explications
	- Utilise des noms courts et clairs pour les entités
	- Inclus les relations les plus importantes seulement
	"""

	last_error = None
	response = None
	for model_name in model_candidates:
	try:
	response = client.models.generate_content(
	model=model_name,
	contents=[
	types.Content(
	role="user",
	parts=[
	types.Part.from_text(text=prompt),
	types.Part.from_bytes(
	data=base64.b64decode(base64_image),
	mime_type="image/png",
	),
	],
	)
	],
	)
	break
	except Exception as e:
	last_error = e
	continue

	if response is None:
	raise last_error if last_error is not None else RuntimeError("Échec Gemini: aucune réponse")

	mermaid_code = (getattr(response, "text", None) or "").strip()
	if not mermaid_code:
	try:
	mermaid_code = (
	response.candidates[0].content.parts[0].text
	).strip()
	except Exception:
	mermaid_code = ""

	# Nettoyer le code pour s'assurer qu'il est valide
	if not mermaid_code.startswith("graph"):
	mermaid_code = f"graph TD;\n{mermaid_code}"

	return mermaid_code

	except Exception as e:
	logging.error(f"Erreur lors de la génération Mermaid avec Gemini: {e}")
	raise


	def mermaid_to_json(mermaid_code):
	"""Convertit un code Mermaid en structure JSON."""
	try:
	lines = [line.strip() for line in mermaid_code.split("\n") if "-->" in line]
	nodes = {}
	edges = []

	for line in lines:
	source, rest = line.split("-->")
	source = source.strip()
	target = rest.strip()

	source_parts = source.split("[")
	source_id = source_parts[0].strip()
	source_label = (
	source_parts[1].replace("]", "").strip()
	if len(source_parts) > 1
	else source_id
	)

	target_parts = target.split("[")
	target_id = target_parts[0].strip()
	target_label = (
	target_parts[1].replace("]", "").strip()
	if len(target_parts) > 1
	else target_id
	)

	if source_id not in nodes:
	nodes[source_id] = source_label
	if target_id not in nodes:
	nodes[target_id] = target_label

	edges.append({"source": source_id, "target": target_id})

	return {
	"nodes": [
	{"id": node_id, "label": label}
	for node_id, label in nodes.items()
	],
	"edges": edges
	}

	except Exception as e:
	logging.error(f"Erreur lors de la conversion Mermaid → JSON : {e}")
	raise


	def graph_json_to_mermaid(graph_json):
	"""Convertit un graphe JSON {nodes:[{id,label}], edges:[{source,target}]} en Mermaid."""
	if not graph_json:
	return "graph TD;"

	nodes = graph_json.get("nodes") or []
	edges = graph_json.get("edges") or []

	def _sanitize_id(raw_id):
	# Mermaid node IDs: avoid spaces/special chars
	return "N_" + "".join(ch if ch.isalnum() or ch == "_" else "_" for ch in str(raw_id))

	id_map = {}
	lines = ["graph TD;"]

	for n in nodes:
	raw_id = n.get("id")
	label = n.get("label") or raw_id
	mid = _sanitize_id(raw_id)
	id_map[str(raw_id)] = mid
	safe_label = str(label).replace("\"", "'")
	lines.append(f" {mid}[\"{safe_label}\"]; ")

	for e in edges:
	src = id_map.get(str(e.get("source")), _sanitize_id(e.get("source")))
	tgt = id_map.get(str(e.get("target")), _sanitize_id(e.get("target")))
	lines.append(f" {src} --> {tgt};")

	return "\n".join(lines)


	def mermaid_to_html(mermaid_code, elem_id):
	"""Wrap Mermaid code into HTML that renders inside Gradio."""
	safe = (mermaid_code or "graph TD;").replace("</script>", "</scr" + "ipt>")
	return f"""
	<div style=\"width:100%;min-height:520px;border:1px solid #e0e0e0;border-radius:8px;background:#fff;overflow:auto;\">
	<div id=\"{elem_id}\" class=\"mermaid\" style=\"padding:16px;\">{safe}</div>
	</div>
	<script>
	(function() {{
	function render() {{
	if (!window.mermaid) return;
	try {{
	window.mermaid.initialize({{ startOnLoad: false, securityLevel: 'loose' }});
	window.mermaid.run({{ nodes: [document.getElementById('{elem_id}')] }});
	}} catch (e) {{ console.error(e); }}
	}}
	if (!document.getElementById('mermaidjs_loader')) {{
	var s = document.createElement('script');
	s.id = 'mermaidjs_loader';
	s.src = 'https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js';
	s.onload = render;
	document.head.appendChild(s);
	}} else {{
	render();
	}}
	}})();
	</script>
	"""


	# ============================================
	# SECTION 3 : GESTION DE LA BASE NEO4J
	# ============================================

	def clear_neo4j():
	"""Efface complètement la base de données Neo4j."""
	try:
	driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
	with driver.session() as session:
	session.run("MATCH (n) DETACH DELETE n")
	driver.close()
	logging.info("Base de données Neo4j vidée avec succès.")
	return True
	except Exception as e:
	logging.error(f"Neo4j indisponible ou mal configuré (clear_neo4j): {e}")
	return False


	def upload_gt_and_test_to_neo4j(gt_json, test_json, doc_name):
	"""Upload GT et Test dans Neo4j avec déduplic

	ation."""
	try:
	driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
	except Exception as e:
	logging.error(f"Neo4j indisponible ou mal configuré (driver): {e}")
	return False

	try:
	with driver.session() as session:
	gt_tag = f"{doc_name}_GT"

	# Upload GT
	unique_gt_nodes = {}
	for node in gt_json["nodes"]:
	node_id = node["id"]
	if node_id not in unique_gt_nodes:
	unique_gt_nodes[node_id] = node

	for node_id, node in unique_gt_nodes.items():
	prefixed_id = f"GT_{node_id}"
	session.run(
	"""
	MERGE (e:Entity {id:$id, doc:$doc})
	ON CREATE SET e.name = $name
	ON MATCH SET e.name = $name
	""",
	{
	"id": prefixed_id,
	"name": escape_string(node["label"]),
	"doc": gt_tag,
	},
	)

	unique_gt_edges = {}
	for edge in gt_json["edges"]:
	edge_key = (edge["source"], edge["target"])
	if edge_key not in unique_gt_edges:
	unique_gt_edges[edge_key] = edge

	for (src, tgt), edge in unique_gt_edges.items():
	src_id = f"GT_{src}"
	tgt_id = f"GT_{tgt}"
	session.run(
	"""
	MATCH (a:Entity {id:$src, doc:$doc})
	MATCH (b:Entity {id:$tgt, doc:$doc})
	MERGE (a)-[r:RELATED_TO {doc:$doc}]->(b)
	""",
	{
	"src": src_id,
	"tgt": tgt_id,
	"doc": gt_tag,
	},
	)

	# Upload Test
	unique_test_nodes = {}
	for node in test_json["nodes"]:
	node_id = node["id"]
	if node_id not in unique_test_nodes:
	unique_test_nodes[node_id] = node

	for node_id, node in unique_test_nodes.items():
	prefixed_id = f"TEST_{node_id}"
	session.run(
	"""
	MERGE (e:Entity {id:$id, doc:$doc})
	ON CREATE SET e.name = $name
	ON MATCH SET e.name = $name
	""",
	{
	"id": prefixed_id,
	"name": escape_string(node["label"]),
	"doc": doc_name,
	},
	)

	unique_test_edges = {}
	for edge in test_json["edges"]:
	edge_key = (edge["source"], edge["target"])
	if edge_key not in unique_test_edges:
	unique_test_edges[edge_key] = edge

	for (src, tgt), edge in unique_test_edges.items():
	src_id = f"TEST_{src}"
	tgt_id = f"TEST_{tgt}"
	session.run(
	"""
	MATCH (a:Entity {id:$src, doc:$doc})
	MATCH (b:Entity {id:$tgt, doc:$doc})
	MERGE (a)-[r:RELATED_TO {doc:$doc}]->(b)
	""",
	{
	"src": src_id,
	"tgt": tgt_id,
	"doc": doc_name,
	},
	)
	except Exception as e:
	logging.error(f"Neo4j indisponible ou mal configuré (upload): {e}")
	return False
	finally:
	try:
	driver.close()
	except Exception:
	pass

	logging.info(
	f"GT uploadé : {len(unique_gt_nodes)} nœuds, {len(unique_gt_edges)} arêtes"
	)
	logging.info(
	f"Test uploadé : {len(unique_test_nodes)} nœuds, {len(unique_test_edges)} arêtes"
	)

	return True


	# ============================================
	# SECTION 4 : MATCHING HYBRIDE (FUZZY + SÉMANTIQUE)
	# ============================================

	def compute_semantic_similarity_batch(test_labels, gt_labels):
	"""Calcule la matrice de similarité sémantique en batch."""
	with torch.no_grad():
	test_embeddings = get_semantic_model().encode(
	test_labels,
	convert_to_tensor=True,
	batch_size=32,
	show_progress_bar=False
	)

	gt_embeddings = get_semantic_model().encode(
	gt_labels,
	convert_to_tensor=True,
	batch_size=32,
	show_progress_bar=False
	)

	similarity_matrix = util.cos_sim(test_embeddings, gt_embeddings)
	return (similarity_matrix * 100).cpu().numpy()


	def hybrid_match_optimized(test_labels, gt_labels, fuzzy_threshold=80,
	semantic_threshold=70, alpha=0.6):
	"""Matching hybride optimisé."""
	if not gt_labels or not test_labels:
	return [(None, 0, 0, 0) for _ in test_labels]

	results = []
	semantic_matrix = compute_semantic_similarity_batch(test_labels, gt_labels)

	for i, test_lbl in enumerate(test_labels):
	best_match = None
	best_combined_score = 0
	best_fuzzy = 0
	best_semantic = 0

	semantic_scores = semantic_matrix[i]
	top_k = min(5, len(gt_labels))
	top_indices = semantic_scores.argsort()[-top_k:][::-1]

	for idx in top_indices:
	candidate = gt_labels[idx]
	semantic_score = float(semantic_scores[idx])

	if semantic_score >= semantic_threshold - 20:
	fuzzy_match, fuzzy_score, _ = process.extractOne(
	test_lbl,
	[candidate],
	scorer=fuzz.ratio
	)

	combined_score = (alpha * fuzzy_score) + ((1 - alpha) * semantic_score)

	if (fuzzy_score >= fuzzy_threshold or
	semantic_score >= semantic_threshold):

	if combined_score > best_combined_score:
	best_combined_score = combined_score
	best_match = candidate
	best_fuzzy = fuzzy_score
	best_semantic = semantic_score

	results.append((best_match, best_combined_score, best_fuzzy, best_semantic))

	return results


	def compute_edge_metrics_undirected(gt_edges, test_edges, gt_id2lbl, test_id2lbl, label_mapping):
	"""
	Traite les arêtes comme NON-ORIENTÉES
	Plus de métriques d'inversion
	"""
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 1: Construire les ensembles d'arêtes NON-ORIENTÉES
	# ═════════════════════════════════════════════════════════════════════
	gt_undirected = set()
	for e in gt_edges:
	src = gt_id2lbl[e["source"]]
	tgt = gt_id2lbl[e["target"]]
	# Normaliser : toujours mettre le plus petit en premier
	gt_undirected.add(tuple(sorted([src, tgt])))

	test_undirected = set()
	for e in test_edges:
	src = test_id2lbl[e["source"]]
	tgt = test_id2lbl[e["target"]]
	src_mapped = label_mapping.get(src, src)
	tgt_mapped = label_mapping.get(tgt, tgt)
	if src_mapped and tgt_mapped:
	test_undirected.add(tuple(sorted([src_mapped, tgt_mapped])))

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 2: Calcul des métriques
	# ═════════════════════════════════════════════════════════════════════
	tp = gt_undirected & test_undirected
	fp = test_undirected - gt_undirected
	fn = gt_undirected - test_undirected

	prec = len(tp) / len(test_undirected) * 100 if test_undirected else 100
	rec = len(tp) / len(gt_undirected) * 100 if gt_undirected else 100

	return {
	"precision_edges": round(prec, 2),
	"recall_edges": round(rec, 2),
	"f1_edges": round(2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0, 2),
	"tp_edges": list(tp),
	"fp_edges": list(fp),
	"fn_edges": list(fn),
	}


	def compute_graph_depth(nodes, edges):
	"""Calcule la profondeur maximale du graphe."""
	from collections import defaultdict, deque

	children = defaultdict(list)
	parents = defaultdict(list)
	node_ids = {n["id"] for n in nodes}

	for edge in edges:
	src, tgt = edge["source"], edge["target"]
	if src in node_ids and tgt in node_ids:
	children[src].append(tgt)
	parents[tgt].append(src)

	roots = [nid for nid in node_ids if nid not in parents]
	if not roots:
	roots = list(node_ids)

	node_levels = {}
	nodes_per_level = defaultdict(list)
	queue = deque([(root, 0) for root in roots])
	visited = set()

	while queue:
	node, level = queue.popleft()
	if node in visited:
	continue

	visited.add(node)
	node_levels[node] = level
	nodes_per_level[level].append(node)

	for child in children[node]:
	if child not in visited:
	queue.append((child, level + 1))

	for node in node_ids:
	if node not in node_levels:
	node_levels[node] = 0
	nodes_per_level[0].append(node)

	max_depth = max(node_levels.values()) if node_levels else 0

	return {
	"max_depth": max_depth,
	"node_levels": node_levels,
	"nodes_per_level": dict(nodes_per_level)
	}


	def compute_hierarchical_metrics(gt_nodes, gt_edges, test_nodes, test_edges, label_mapping):
	"""Calcule les métriques hiérarchiques."""
	gt_hierarchy = compute_graph_depth(gt_nodes, gt_edges)
	test_hierarchy = compute_graph_depth(test_nodes, test_edges)

	gt_depth = gt_hierarchy["max_depth"]
	test_depth = test_hierarchy["max_depth"]
	depth_diff = abs(gt_depth - test_depth)

	gt_id2label = {n["id"]: n["label"] for n in gt_nodes}
	test_id2label = {n["id"]: n["label"] for n in test_nodes}

	gt_label_levels = {}
	for node_id, level in gt_hierarchy["node_levels"].items():
	label = gt_id2label.get(node_id)
	if label:
	gt_label_levels[label] = level

	correct_levels = 0
	total_matched = 0

	for test_id, test_level in test_hierarchy["node_levels"].items():
	test_label = test_id2label.get(test_id)
	if not test_label:
	continue

	gt_label = label_mapping.get(test_label)
	if not gt_label:
	continue

	total_matched += 1

	expected_level = gt_label_levels.get(gt_label)
	if expected_level is not None and expected_level == test_level:
	correct_levels += 1

	level_accuracy = (
	(correct_levels / total_matched * 100)
	if total_matched > 0
	else 0
	)

	max_level = max(gt_depth, test_depth)
	similarity_scores = []
	for level in range(max_level + 1):
	gt_count = len(gt_hierarchy["nodes_per_level"].get(level, []))
	test_count = len(test_hierarchy["nodes_per_level"].get(level, []))

	max_count = max(gt_count, test_count, 1)
	level_sim = 1 - abs(gt_count - test_count) / max_count
	similarity_scores.append(level_sim)

	structure_similarity = (
	sum(similarity_scores) / len(similarity_scores) * 100
	if similarity_scores
	else 0
	)

	return {
	"gt_depth": gt_depth,
	"test_depth": test_depth,
	"depth_difference": depth_diff,
	"depth_match": depth_diff == 0,
	"level_accuracy": round(level_accuracy, 2),
	"structure_similarity": round(structure_similarity, 2),
	"gt_nodes_per_level": gt_hierarchy["nodes_per_level"],
	"test_nodes_per_level": test_hierarchy["nodes_per_level"],
	"correct_level_count": correct_levels,
	"total_matched_nodes": total_matched
	}


	def compute_ged_coherent(gt_nodes, gt_edges, test_nodes, test_edges,
	hallucination_metrics, edge_metrics, label_mapping=None):
	"""
	GED avec les métriques d'arêtes
	"""
	try:
	# ═════════════════════════════════════════════════════════════════
	# ÉTAPE 1 : Opérations sur les NŒUDS (depuis hallucination_metrics)
	# ═════════════════════════════════════════════════════════════════
	hallucinated_list = hallucination_metrics["hallucinated_nodes"]
	missed_gt_list = hallucination_metrics["missed_gt_nodes"]

	num_node_deletions = len(hallucinated_list) # Supprimer du test
	num_node_insertions = len(missed_gt_list) # Ajouter au test
	num_node_substitutions = 0

	# ═════════════════════════════════════════════════════════════════
	# ÉTAPE 2 : Opérations sur les ARÊTES (depuis edge_metrics)
	# ═════════════════════════════════════════════════════════════════
	# Utilisation de edge_metrics
	fp_edges = edge_metrics["fp_edges"] # Arêtes en trop
	fn_edges = edge_metrics["fn_edges"] # Arêtes manquantes

	num_edge_deletions = len(fp_edges) # À supprimer du test
	num_edge_insertions = len(fn_edges) # À ajouter au test
	num_edge_substitutions = 0

	# ═════════════════════════════════════════════════════════════════
	# ÉTAPE 3 : Calcul du GED total
	# ═════════════════════════════════════════════════════════════════
	ged = (num_node_deletions + num_node_insertions + num_node_substitutions +
	num_edge_deletions + num_edge_insertions + num_edge_substitutions)

	# Normalisation
	max_ops = (len(gt_nodes) + len(gt_edges) + len(test_nodes) + len(test_edges))
	normalized_ged = (ged / max_ops * 100) if max_ops > 0 else 0
	similarity = 100 - normalized_ged

	# ═════════════════════════════════════════════════════════════════
	# LOGS DE VÉRIFICATION
	# ═════════════════════════════════════════════════════════════════
	logging.info("=" * 60)
	logging.info("GRAPH EDIT DISTANCE (test → GT)")
	logging.info("=" * 60)
	logging.info(f" GED total : {ged:.2f} opérations")
	logging.info(f" • GED normalisé : {normalized_ged:.2f}%")
	logging.info(f" • Similarité : {similarity:.2f}%")
	logging.info("")
	logging.info(f" Opérations sur NŒUDS :")
	logging.info(f" • Suppressions : {num_node_deletions} (= hallucinés)")
	logging.info(f" • Insertions : {num_node_insertions} (= manqués GT)")
	logging.info("")
	logging.info(f" Opérations sur ARÊTES :")
	logging.info(f" • Suppressions : {num_edge_deletions} (= arêtes en trop)")
	logging.info(f" • Insertions : {num_edge_insertions} (= arêtes manquantes)")
	logging.info("")
	logging.info(f"🔹 COHÉRENCE AVEC MÉTRIQUES :")
	logging.info(f" ✓ fp_edges = {len(fp_edges)} → Suppressions = {num_edge_deletions}")
	logging.info(f" ✓ fn_edges = {len(fn_edges)} → Insertions = {num_edge_insertions}")
	logging.info("=" * 60)

	return {
	"ged": round(ged, 2),
	"normalized_ged": round(normalized_ged, 2),
	"structural_similarity": round(similarity, 2),
	"gt_nodes_count": len(gt_nodes),
	"gt_edges_count": len(gt_edges),
	"test_nodes_count": len(test_nodes),
	"test_edges_count": len(test_edges),

	# Opérations sur nœuds
	"num_node_deletions": num_node_deletions,
	"num_node_insertions": num_node_insertions,
	"num_node_substitutions": num_node_substitutions,

	# Opérations sur arêtes
	"num_edge_deletions": num_edge_deletions,
	"num_edge_insertions": num_edge_insertions,
	"num_edge_substitutions": num_edge_substitutions,

	# Listes détaillées
	"node_deletions": hallucinated_list[:10],
	"node_insertions": missed_gt_list[:10],
	"edge_deletions": [f"{a} - {b}" for a, b in list(fp_edges)[:10]],
	"edge_insertions": [f"{a} - {b}" for a, b in list(fn_edges)[:10]]
	}

	except Exception as e:
	logging.error(f"⚠️ Erreur lors du calcul du GED : {e}")
	return {
	"ged": None,
	"normalized_ged": None,
	"structural_similarity": None,
	"num_node_deletions": 0,
	"num_node_insertions": 0,
	"num_node_substitutions": 0,
	"num_edge_deletions": 0,
	"num_edge_insertions": 0,
	"num_edge_substitutions": 0,
	"error": str(e)
	}


	def calculate_hallucination_metrics(gt_nodes, test_nodes, mapped_test_labels):
	"""Calcule les métriques d'hallucination."""
	gt_labels = set(n["label"] for n in gt_nodes)
	test_labels = [n["label"] for n in test_nodes]

	hallucinated_nodes = []
	correct_nodes = []
	matched_gt_nodes = set()
	already_matched_gt = {}

	for test_label, matched_label in zip(test_labels, mapped_test_labels):
	if matched_label is None or matched_label not in gt_labels:
	hallucinated_nodes.append(test_label)
	else:
	if matched_label in already_matched_gt:
	hallucinated_nodes.append(test_label)
	else:
	correct_nodes.append(test_label)
	matched_gt_nodes.add(matched_label)
	already_matched_gt[matched_label] = test_label

	total_generated = len(test_labels)
	num_hallucinated = len(hallucinated_nodes)
	num_correct = len(correct_nodes)

	total_gt = len(gt_labels)
	num_detected = len(matched_gt_nodes)
	num_missed = total_gt - num_detected

	hallucination_rate = (num_hallucinated / total_generated * 100) if total_generated > 0 else 0
	detection_rate = (num_detected / total_gt * 100) if total_gt > 0 else 0

	return {
	"hallucination_rate": round(hallucination_rate, 2),
	"detection_rate": round(detection_rate, 2),
	"total_generated": total_generated,
	"num_hallucinated": num_hallucinated,
	"num_correct": num_correct,
	"hallucinated_nodes": hallucinated_nodes,
	"correct_nodes": correct_nodes,
	"total_gt": total_gt,
	"num_detected": num_detected,
	"num_missed": num_missed,
	"missed_gt_nodes": list(gt_labels - matched_gt_nodes)
	}


	def compare_graphs_with_semantic_fast(gt_nodes, gt_edges, test_nodes, test_edges,
	fuzzy_threshold=80, semantic_threshold=70,
	alpha=0.6, edge_mode="hybrid"):
	"""
	Sans inversions + GED cohérent
	"""
	gt_labels = [n["label"] for n in gt_nodes]
	test_labels = [n["label"] for n in test_nodes]

	matches = hybrid_match_optimized(
	test_labels, gt_labels, fuzzy_threshold, semantic_threshold, alpha
	)

	mapped_test_labels = []
	matching_details = []

	for test_lbl, (match, combined_score, fuzzy_score, sem_score) in zip(test_labels, matches):
	if match:
	mapped_test_labels.append(match)
	matching_details.append({
	"test": test_lbl,
	"matched": match,
	"combined": round(combined_score, 2),
	"fuzzy": round(fuzzy_score, 2),
	"semantic": round(sem_score, 2)
	})
	else:
	mapped_test_labels.append(None)
	matching_details.append({
	"test": test_lbl,
	"matched": "NO_MATCH",
	"combined": 0,
	"fuzzy": 0,
	"semantic": 0
	})

	hallucination_metrics = calculate_hallucination_metrics(
	gt_nodes, test_nodes, mapped_test_labels
	)

	label_mapping = {
	test_lbl: match
	for test_lbl, (match, _, _, _) in zip(test_labels, matches)
	if match
	}

	hallucinated_nodes_list = hallucination_metrics["hallucinated_nodes"]

	gt_set = set(gt_labels)
	test_set = set([lbl for lbl in mapped_test_labels if lbl is not None])

	tp_nodes = gt_set & test_set
	extra_nodes = set(hallucinated_nodes_list)
	fn_nodes = gt_set - test_set

	gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes}
	test_id2lbl = {n["id"]: n["label"] for n in test_nodes}

	# Utiliser la fonction non-orientée
	edge_metrics = compute_edge_metrics_undirected(
	gt_edges, test_edges, gt_id2lbl, test_id2lbl, label_mapping
	)

	prec_edges = edge_metrics["precision_edges"] / 100
	rec_edges = edge_metrics["recall_edges"] / 100
	f1_edges = edge_metrics["f1_edges"] / 100

	tp_edges = set(edge_metrics["tp_edges"])
	fp_edges = set(edge_metrics["fp_edges"])
	fn_edges = set(edge_metrics["fn_edges"])

	prec_nodes = (
	len(tp_nodes) / (len(tp_nodes) + len(extra_nodes))
	if tp_nodes or extra_nodes
	else 1.0
	)
	rec_nodes = (
	len(tp_nodes) / (len(tp_nodes) + len(fn_nodes))
	if tp_nodes or fn_nodes
	else 1.0
	)
	f1_nodes = (
	2 * prec_nodes * rec_nodes / (prec_nodes + rec_nodes)
	if (prec_nodes + rec_nodes)
	else 0.0
	)

	hierarchical_metrics = compute_hierarchical_metrics(
	gt_nodes, gt_edges, test_nodes, test_edges, label_mapping
	)

	# Passer edge_metrics au GED
	ged_metrics = compute_ged_coherent(
	gt_nodes, gt_edges,
	test_nodes, test_edges,
	hallucination_metrics,
	edge_metrics,
	label_mapping
	)

	return {
	"precision_nodes": round(prec_nodes * 100, 2),
	"recall_nodes": round(rec_nodes * 100, 2),
	"f1_nodes": round(f1_nodes * 100, 2),

	"precision_edges": round(prec_edges * 100, 2),
	"recall_edges": round(rec_edges * 100, 2),
	"f1_edges": round(f1_edges * 100, 2),

	"overall_precision": round(((prec_nodes + prec_edges) / 2) * 100, 2),
	"overall_recall": round(((rec_nodes + rec_edges) / 2) * 100, 2),
	"overall_f1": round(((f1_nodes + f1_edges) / 2) * 100, 2),

	"missing_nodes": list(fn_nodes),
	"extra_nodes": list(extra_nodes),
	"missing_edges": list(fn_edges),
	"extra_edges": list(fp_edges),
	"matching_details": matching_details,

	"tp_edges": list(tp_edges),
	"fp_edges": list(fp_edges),
	"fn_edges": list(fn_edges),

	"hallucination_rate": hallucination_metrics["hallucination_rate"],
	"detection_rate": hallucination_metrics["detection_rate"],
	"total_generated": hallucination_metrics["total_generated"],
	"num_hallucinated": hallucination_metrics["num_hallucinated"],
	"num_correct": hallucination_metrics["num_correct"],
	"hallucinated_nodes": hallucination_metrics["hallucinated_nodes"],
	"total_gt": hallucination_metrics["total_gt"],
	"num_detected": hallucination_metrics["num_detected"],
	"num_missed": hallucination_metrics["num_missed"],
	"missed_gt_nodes": hallucination_metrics["missed_gt_nodes"],

	"gt_depth": hierarchical_metrics["gt_depth"],
	"test_depth": hierarchical_metrics["test_depth"],
	"depth_difference": hierarchical_metrics["depth_difference"],
	"depth_match": hierarchical_metrics["depth_match"],
	"level_accuracy": hierarchical_metrics["level_accuracy"],
	"structure_similarity": hierarchical_metrics["structure_similarity"],
	"gt_nodes_per_level": hierarchical_metrics["gt_nodes_per_level"],
	"test_nodes_per_level": hierarchical_metrics["test_nodes_per_level"],

	"ged": ged_metrics.get("ged"),
	"normalized_ged": ged_metrics.get("normalized_ged"),
	"structural_similarity": ged_metrics.get("structural_similarity"),
	"num_node_insertions": ged_metrics.get("num_node_insertions", 0),
	"num_node_deletions": ged_metrics.get("num_node_deletions", 0),
	"num_edge_insertions": ged_metrics.get("num_edge_insertions", 0),
	"num_edge_deletions": ged_metrics.get("num_edge_deletions", 0),
	}



	# ============================================
	# SECTION 5 : MÉTRIQUES D'HALLUCINATION
	# ============================================

	def calculate_hallucination_metrics(gt_nodes, test_nodes, mapped_test_labels):
	"""


	RÈGLES STRICTES :
	- num_correct = nœuds test qui ont matché avec GT
	- num_hallucinated = nœuds test qui n'ont PAS matché
	- num_detected = nœuds GT qui ont été trouvés par le test
	- num_missed = nœuds GT qui n'ont PAS été trouvés

	COHÉRENCE OBLIGATOIRE :
	- num_correct + num_hallucinated = total_generated
	- num_detected + num_missed = total_gt
	- num_correct ≤ num_detected (car un nœud test peut matcher plusieurs fois)
	"""
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 1: Extraction des labels
	# ═════════════════════════════════════════════════════════════════════
	gt_labels = set(n["label"] for n in gt_nodes)
	test_labels = [n["label"] for n in test_nodes]

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 2: Classification avec prévention des doublons
	# ═════════════════════════════════════════════════════════════════════
	hallucinated_nodes = []
	correct_nodes = []
	matched_gt_nodes = set()
	already_matched_gt = {} # {gt_label: test_label} pour tracer les doublons

	for test_label, matched_label in zip(test_labels, mapped_test_labels):
	if matched_label is None or matched_label not in gt_labels:
	hallucinated_nodes.append(test_label)
	else:
	# Vérifier si ce GT a déjà été matché
	if matched_label in already_matched_gt:
	# Doublon → compté comme hallucination
	hallucinated_nodes.append(test_label)
	logging.warning(
	f"⚠️ Doublon : '{test_label}' matche '{matched_label}' "
	f"déjà matché par '{already_matched_gt[matched_label]}'"
	)
	else:
	# Premier match → correct
	correct_nodes.append(test_label)
	matched_gt_nodes.add(matched_label)
	already_matched_gt[matched_label] = test_label

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 3: Calcul des statistiques
	# ═════════════════════════════════════════════════════════════════════
	total_generated = len(test_labels)
	num_hallucinated = len(hallucinated_nodes)
	num_correct = len(correct_nodes)

	total_gt = len(gt_labels)
	num_detected = len(matched_gt_nodes)
	num_missed = total_gt - num_detected

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 4: Vérifications de cohérence
	# ═════════════════════════════════════════════════════════════════════
	assert num_correct + num_hallucinated == total_generated, \
	f"❌ {num_correct} + {num_hallucinated} ≠ {total_generated}"

	assert num_detected + num_missed == total_gt, \
	f"❌ {num_detected} + {num_missed} ≠ {total_gt}"

	assert num_correct == num_detected, \
	f"❌ CRITIQUE : {num_correct} corrects ≠ {num_detected} détectés"

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 5: Calcul des taux
	# ═════════════════════════════════════════════════════════════════════
	hallucination_rate = (num_hallucinated / total_generated * 100) if total_generated > 0 else 0
	detection_rate = (num_detected / total_gt * 100) if total_gt > 0 else 0

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 6: Logs détaillés
	# ═════════════════════════════════════════════════════════════════════
	logging.info("=" * 60)
	logging.info("📊 MÉTRIQUES D'HALLUCINATION")
	logging.info("=" * 60)
	logging.info(f"🔹 Vue MODÈLE (test) :")
	logging.info(f" • Total généré : {total_generated}")
	logging.info(f" • Corrects : {num_correct}")
	logging.info(f" • Hallucinés : {num_hallucinated}")
	logging.info(f" ✓ {num_correct} + {num_hallucinated} = {total_generated}")
	logging.info("")
	logging.info(f"🔹 Vue GROUND TRUTH :")
	logging.info(f" • Total GT : {total_gt}")
	logging.info(f" • Détectés : {num_detected}")
	logging.info(f" • Manqués : {num_missed}")
	logging.info(f" ✓ {num_detected} + {num_missed} = {total_gt}")
	logging.info("")
	logging.info(f"🔹 COHÉRENCE :")
	logging.info(f" ✓ Corrects = Détectés : {num_correct} = {num_detected}")
	logging.info("=" * 60)

	return {
	"hallucination_rate": round(hallucination_rate, 2),
	"detection_rate": round(detection_rate, 2),
	"total_generated": total_generated,
	"num_hallucinated": num_hallucinated,
	"num_correct": num_correct,
	"hallucinated_nodes": hallucinated_nodes,
	"correct_nodes": correct_nodes,
	"total_gt": total_gt,
	"num_detected": num_detected,
	"num_missed": num_missed,
	"missed_gt_nodes": list(gt_labels - matched_gt_nodes)
	}


	# ============================================
	# SECTION 6 : COMPARAISON AVEC FUZZY UNIQUEMENT
	# ============================================

	def compare_graphs(gt_nodes, gt_edges, test_nodes, test_edges, threshold=100):
	"""
	Compare deux graphes en utilisant UNIQUEMENT le fuzzy matching.

	Args:
	gt_nodes (list): Nœuds du ground truth
	gt_edges (list): Arêtes du ground truth
	test_nodes (list): Nœuds générés
	test_edges (list): Arêtes générées
	threshold (int): Seuil de similarité fuzzy (0-100)

	Returns:
	dict: Mêmes métriques que compare_graphs_with_semantic_fast

	Note:
	Version simplifiée sans matching sémantique.
	Utilisée quand use_semantic=False dans l'interface.
	Plus rapide mais moins précise pour les synonymes.

	Différence avec la version sémantique:
	- Pas de batch processing
	- Pas de Top-K filtering
	- Uniquement distance de Levenshtein
	"""
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 1: Extraction des labels
	# ═════════════════════════════════════════════════════════════════════
	gt_labels = [n["label"] for n in gt_nodes]
	test_labels = [n["label"] for n in test_nodes]

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 2: Fuzzy matching pour chaque label test
	# ═════════════════════════════════════════════════════════════════════
	mapped_test_labels = []

	for lbl in test_labels:
	# Trouver le meilleur match dans GT selon la distance de Levenshtein
	match, score, _ = process.extractOne(
	lbl,
	gt_labels,
	scorer=fuzz.ratio # Distance de Levenshtein normalisée [0-100]
	)

	# Accepter le match seulement si score >= seuil
	if score >= threshold:
	mapped_test_labels.append(match)
	else:
	mapped_test_labels.append(None) # Pas de match

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 3: Calcul des métriques d'hallucination
	# ═════════════════════════════════════════════════════════════════════
	hallucination_metrics = calculate_hallucination_metrics(
	gt_nodes, test_nodes, mapped_test_labels
	)

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 4: Calcul des métriques (identique à la version sémantique)
	# ═════════════════════════════════════════════════════════════════════
	# [Reste du code identique à compare_graphs_with_semantic_fast]
	# Pour éviter la duplication, je n'annote que les différences

	gt_set = set(gt_labels)
	test_set = set([lbl for lbl in mapped_test_labels if lbl is not None])

	tp_nodes = gt_set & test_set
	fp_nodes = test_set - gt_set
	fn_nodes = gt_set - test_set

	# Fonction interne pour remapper les arêtes
	def remap_edges(edges, id_to_label):
	"""Remappe les arêtes avec fuzzy matching sur les endpoints."""
	remapped = []
	for e in edges:
	a = id_to_label[e["source"]]
	b = id_to_label[e["target"]]

	# Fuzzy-map les endpoints
	a_match, a_score, _ = process.extractOne(a, gt_labels, scorer=fuzz.ratio)
	b_match, b_score, _ = process.extractOne(b, gt_labels, scorer=fuzz.ratio)

	if a_score >= threshold: a = a_match
	if b_score >= threshold: b = b_match

	remapped.append((a, b))
	return set(remapped)

	gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes}
	test_id2lbl = {n["id"]: n["label"] for n in test_nodes}

	gt_edges_set = remap_edges(gt_edges, gt_id2lbl)
	test_edges_set = remap_edges(test_edges, test_id2lbl)

	tp_edges = gt_edges_set & test_edges_set
	fp_edges = test_edges_set - gt_edges_set
	fn_edges = gt_edges_set - test_edges_set

	# Calcul des métriques
	prec_nodes = len(tp_nodes) / (len(tp_nodes) + len(fp_nodes)) if tp_nodes or fp_nodes else 1.0
	rec_nodes = len(tp_nodes) / (len(tp_nodes) + len(fn_nodes)) if tp_nodes or fn_nodes else 1.0
	f1_nodes = (2 * prec_nodes * rec_nodes / (prec_nodes + rec_nodes)
	if (prec_nodes + rec_nodes) else 0.0)

	prec_edges = len(tp_edges) / (len(tp_edges) + len(fp_edges)) if tp_edges or fp_edges else 1.0
	rec_edges = len(tp_edges) / (len(tp_edges) + len(fn_edges)) if tp_edges or fn_edges else 1.0
	f1_edges = (2 * prec_edges * rec_edges / (prec_edges + rec_edges)
	if (prec_edges + rec_edges) else 0.0)

	return {
	"precision_nodes": round(prec_nodes * 100, 2),
	"recall_nodes": round(rec_nodes * 100, 2),
	"f1_nodes": round(f1_nodes * 100, 2),
	"precision_edges": round(prec_edges * 100, 2),
	"recall_edges": round(rec_edges * 100, 2),
	"f1_edges": round(f1_edges * 100, 2),
	"overall_precision": round(((prec_nodes + prec_edges) / 2) * 100, 2),
	"overall_recall": round(((rec_nodes + rec_edges) / 2) * 100, 2),
	"overall_f1": round(((f1_nodes + f1_edges) / 2) * 100, 2),
	"missing_nodes": list(fn_nodes),
	"extra_nodes": list(fp_nodes),
	"missing_edges": list(fn_edges),
	"extra_edges": list(fp_edges),

	# Métriques d'hallucination (même structure que version sémantique)
	"hallucination_rate": hallucination_metrics["hallucination_rate"],
	"detection_rate": hallucination_metrics["detection_rate"],
	"total_generated": hallucination_metrics["total_generated"],
	"num_hallucinated": hallucination_metrics["num_hallucinated"],
	"num_correct": hallucination_metrics["num_correct"],
	"hallucinated_nodes": hallucination_metrics["hallucinated_nodes"],
	"total_gt": hallucination_metrics["total_gt"],
	"num_detected": hallucination_metrics["num_detected"],
	"num_missed": hallucination_metrics["num_missed"],
	"missed_gt_nodes": hallucination_metrics["missed_gt_nodes"]
	}


	# ============================================
	# SECTION 7 : GÉNÉRATION DE RAPPORTS
	# ============================================

	def save_results_to_pdf(results, model_name):
	"""
	Génère un rapport PDF moderne et visuellement attractif.
	"""
	os.makedirs("reports", exist_ok=True)

	# ═════════════════════════════════════════════════════════════════════
	# HELPER : Nettoyer les caractères problématiques
	# ═════════════════════════════════════════════════════════════════════
	def clean_text(text):
	"""
	Nettoie le texte pour éviter les erreurs Unicode avec FPDF.
	Remplace les caractères problématiques par des équivalents ASCII.
	"""
	if not isinstance(text, str):
	text = str(text)

	# Dictionnaire de remplacement des caractères spéciaux
	replacements = {
	'→': '->',
	'←': '<-',
	'↔': '<->',
	'…': '...',
	'–': '-',
	'—': '-',
	'"': '"',
	'"': '"',
	''': "'",
	''': "'",
	'«': '"',
	'»': '"',
	'×': 'x',
	'÷': '/',
	'≤': '<=',
	'≥': '>=',
	'≠': '!=',
	'≈': '~=',
	'∞': 'inf',
	'√': 'sqrt',
	'∑': 'sum',
	'∏': 'prod',
	'∫': 'int',
	'∂': 'd',
	'∆': 'delta',
	'∇': 'nabla',
	'∈': 'in',
	'∉': 'not in',
	'⊂': 'subset',
	'⊃': 'superset',
	'∩': 'intersect',
	'∪': 'union',
	'∧': 'and',
	'∨': 'or',
	'¬': 'not',
	'⊕': 'xor',
	'⊗': 'otimes',
	'①': '(1)',
	'②': '(2)',
	'③': '(3)',
	'④': '(4)',
	'⑤': '(5)',
	'⑥': '(6)',
	'⑦': '(7)',
	'⑧': '(8)',
	'⑨': '(9)',
	'⑩': '(10)',
	}

	# Appliquer les remplacements
	for old, new in replacements.items():
	text = text.replace(old, new)

	# Supprimer les caractères non-imprimables et les emojis
	# Garder uniquement : lettres, chiffres, ponctuation basique, espaces
	cleaned = ""
	for char in text:
	code = ord(char)
	# Garder les caractères ASCII imprimables + caractères latins étendus
	if (32 <= code <= 126) or (160 <= code <= 255):
	cleaned += char
	elif code == 9 or code == 10 or code == 13: # Tab, LF, CR
	cleaned += char
	else:
	cleaned += "?" # Remplacer les caractères non supportés

	return cleaned

	# ═════════════════════════════════════════════════════════════════════
	# Configuration du PDF
	# ═════════════════════════════════════════════════════════════════════
	pdf = FPDF()
	base = os.path.abspath(_SCRIPT_DIR)
	regular = os.path.abspath(os.path.join(base, "DejaVuSans.ttf"))
	bold = os.path.abspath(os.path.join(base, "DejaVuSans-Bold.ttf"))

	if not os.path.exists(regular) or not os.path.exists(bold):
	raise FileNotFoundError(
	f"Police introuvable. Attendu: '{regular}' et '{bold}'."
	)

	import glob

	cache_dirs = {base, os.getcwd()}
	for d in cache_dirs:
	try:
	for cache_file in glob.glob(os.path.join(d, "DejaVu*.pkl")):
	try:
	os.remove(cache_file)
	except Exception:
	pass
	except Exception:
	pass

	FONT_FAMILY = "DejaVuLocal"
	pdf.add_font(FONT_FAMILY, "", regular, uni=True)
	pdf.add_font(FONT_FAMILY, "B", bold, uni=True)

	_orig_set_font = pdf.set_font

	def _set_font(_family, args, *kwargs):
	return _orig_set_font(FONT_FAMILY, args, *kwargs)

	pdf.set_font = _set_font

	pdf.add_page()

	# ═════════════════════════════════════════════════════════════════════
	# HELPER FUNCTIONS
	# ═════════════════════════════════════════════════════════════════════

	def draw_colored_header(text, color_r, color_g, color_b):
	"""Dessine un en-tête coloré."""
	pdf.set_fill_color(color_r, color_g, color_b)
	pdf.set_text_color(255, 255, 255)
	pdf.set_font("DejaVu", "B", 14)
	pdf.cell(0, 10, clean_text(text), ln=True, align="C", fill=True) # ← NETTOYAGE
	pdf.set_text_color(0, 0, 0)
	pdf.ln(3)

	def draw_section_title(text, emoji=""):
	"""Dessine un titre de section."""
	pdf.set_font("DejaVu", "B", 11)
	pdf.set_fill_color(240, 240, 240)
	pdf.cell(0, 8, clean_text(text), ln=True, fill=True) #
	pdf.ln(2)

	def draw_progress_bar(label, value, max_value=100, width=140):
	"""Dessine une barre de progression colorée."""
	pdf.set_font("DejaVu", "", 9)

	# Label (nettoyé)
	pdf.cell(50, 6, clean_text(label), 0, 0)

	# Valeur
	pdf.set_font("DejaVu", "B", 9)
	pdf.cell(15, 6, f"{value:.1f}%", 0, 0, "R")

	# Barre de fond (gris clair)
	x_start = pdf.get_x() + 2
	y_start = pdf.get_y()
	pdf.set_fill_color(220, 220, 220)
	pdf.rect(x_start, y_start + 1, width, 4, "F")

	# Barre de progression (couleur selon valeur)
	if value >= 80:
	pdf.set_fill_color(76, 175, 80) # Vert
	elif value >= 60:
	pdf.set_fill_color(255, 193, 7) # Jaune
	else:
	pdf.set_fill_color(244, 67, 54) # Rouge

	progress_width = (value / max_value) * width
	pdf.rect(x_start, y_start + 1, progress_width, 4, "F")

	pdf.ln(7)

	def draw_metric_box(label, value, color_r, color_g, color_b):
	"""Dessine une boîte de métrique colorée."""
	pdf.set_fill_color(color_r, color_g, color_b)
	pdf.set_text_color(255, 255, 255)
	pdf.set_font("DejaVu", "B", 10)

	# Boîte colorée (nettoyé)
	pdf.cell(60, 8, clean_text(label), 1, 0, "C", fill=True)
	pdf.cell(30, 8, clean_text(str(value)), 1, 0, "C", fill=True)

	pdf.set_text_color(0, 0, 0)
	pdf.ln()

	def draw_table_header(headers):
	"""Dessine l'en-tête d'un tableau."""
	pdf.set_fill_color(63, 81, 181) # Bleu
	pdf.set_text_color(255, 255, 255)
	pdf.set_font("DejaVu", "B", 9)

	col_width = 190 / len(headers)
	for header in headers:
	pdf.cell(col_width, 7, clean_text(header), 1, 0, "C", fill=True) # ← NETTOYAGE
	pdf.ln()
	pdf.set_text_color(0, 0, 0)

	def draw_table_row(values, fill=False):
	"""Dessine une ligne de tableau."""
	pdf.set_font("DejaVu", "", 8)
	if fill:
	pdf.set_fill_color(245, 245, 245)

	col_width = 190 / len(values)
	for value in values:
	pdf.cell(col_width, 6, clean_text(str(value)), 1, 0, "C", fill) # ← NETTOYAGE
	pdf.ln()

	# ═════════════════════════════════════════════════════════════════════
	# EN-TÊTE PRINCIPAL DU RAPPORT
	# ═════════════════════════════════════════════════════════════════════
	draw_colored_header(f"RAPPORT DE PERFORMANCE - {model_name}", 33, 150, 243)

	# date et heure
	from datetime import datetime
	now = datetime.now()
	date_str = now.strftime('%d/%m/%Y')
	heure_str = now.strftime('%H:%M:%S')

	pdf.set_font("DejaVu", "", 9)
	pdf.cell(0, 5, f"Date : {date_str}", ln=True, align="C")
	pdf.cell(0, 5, f"Heure : {heure_str}", ln=True, align="C")
	pdf.cell(0, 5, f"Modele : {model_name}", ln=True, align="C")
	pdf.cell(0, 5, f"Nombre de documents : {len(results)}", ln=True, align="C")
	pdf.ln(5)

	# ═════════════════════════════════════════════════════════════════════
	# BOUCLE SUR CHAQUE FICHIER
	# ═════════════════════════════════════════════════════════════════════
	for idx, res in enumerate(results, 1):
	details = res["details"]

	if idx > 1:
	pdf.add_page()

	# Titre avec nom du document + modèle
	pdf.set_fill_color(96, 125, 139)
	pdf.set_text_color(255, 255, 255)
	pdf.set_font("DejaVu", "B", 12)
	pdf.cell(0, 10, clean_text(f"Document {idx}: {res['file']}"), ln=True, fill=True, align="C")

	# Ajout du modèle et de l'heure de traitement
	pdf.set_text_color(0, 0, 0)
	pdf.set_font("DejaVu", "", 8)
	pdf.cell(0, 5, f"Modele utilise : {model_name} \| Traite le : {date_str} a {heure_str}", ln=True, align="C")
	pdf.ln(3)

	# ─────────────────────────────────────────────────────────────────
	# SECTION 1: Vue d'ensemble
	# ─────────────────────────────────────────────────────────────────
	draw_section_title("VUE D'ENSEMBLE")

	draw_progress_bar("Precision Globale", details.get('overall_precision', 0))
	draw_progress_bar("Rappel Global", details.get('overall_recall', 0))
	draw_progress_bar("F1-Score Global", details.get('overall_f1', 0))

	pdf.ln(3)

	# ─────────────────────────────────────────────────────────────────
	# SECTION 2: Métriques des Nœuds
	# ─────────────────────────────────────────────────────────────────
	draw_section_title("ANALYSE DES NOEUDS")

	# Tableau des métriques
	draw_table_header(["Metrique", "Precision", "Rappel", "F1-Score"])
	draw_table_row([
	"Noeuds",
	f"{details.get('precision_nodes', 0)}%",
	f"{details.get('recall_nodes', 0)}%",
	f"{details.get('f1_nodes', 0)}%"
	])

	pdf.ln(3)

	# Statistiques de génération
	pdf.set_font("DejaVu", "B", 9)
	pdf.cell(0, 6, "Statistiques de generation :", ln=True)
	pdf.set_font("DejaVu", "", 9)

	total_gen = details.get('total_generated', 0)
	num_correct = details.get('num_correct', 0)
	num_halluc = details.get('num_hallucinated', 0)
	total_gt = details.get('total_gt', 0)
	num_detected = details.get('num_detected', 0)
	num_missed = details.get('num_missed', 0)

	# Ligne 1 : Nœuds générés vs GT
	pdf.cell(95, 6, f" Noeuds generes : {total_gen}", 0, 0)
	pdf.cell(95, 6, f" Noeuds GT : {total_gt}", 0, 1)

	# Ligne 2 : Nœuds corrects vs détectés
	pdf.cell(95, 6, f" Noeuds corrects : {num_correct}", 0, 0)
	pdf.cell(95, 6, f" Noeuds detectes : {num_detected}", 0, 1)

	# Ligne 3 : Hallucinations vs manqués
	pdf.cell(95, 6, f" Noeuds hallucines : {num_halluc}", 0, 0)
	pdf.cell(95, 6, f" Noeuds GT manques : {num_missed}", 0, 1)

	# Ligne 4 : Taux (hallucination + détection)
	halluc_rate = details.get('hallucination_rate', 0)
	detect_rate = details.get('detection_rate', 0)
	pdf.cell(95, 6, f" Taux hallucination : {halluc_rate}%", 0, 0)
	pdf.cell(95, 6, f" Taux detection : {detect_rate}%", 0, 1)

	pdf.ln(3)

	# ═════════════════════════════════════════════════════════════════
	# Détails des nœuds GT manqués
	# ═════════════════════════════════════════════════════════════════
	missed_gt_nodes = details.get('missed_gt_nodes', [])
	if missed_gt_nodes:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 243, 224) # Fond orange clair
	pdf.cell(0, 6, f"Noeuds GT non detectes ({len(missed_gt_nodes)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	# Afficher les 5 premiers
	for i, node in enumerate(missed_gt_nodes[:5], 1):
	pdf.cell(5, 5, "", 0, 0) # Indentation
	pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True)

	if len(missed_gt_nodes) > 5:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(missed_gt_nodes) - 5} autres", ln=True)

	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════════
	# Détails des nœuds hallucinés (si présents)
	# ═════════════════════════════════════════════════════════════════
	hallucinated_nodes = details.get('hallucinated_nodes', [])
	if hallucinated_nodes:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 235, 230) # Fond rouge clair
	pdf.cell(0, 6, f"Noeuds hallucines ({len(hallucinated_nodes)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	# Afficher les 5 premiers
	for i, node in enumerate(hallucinated_nodes[:5], 1):
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True)

	if len(hallucinated_nodes) > 5:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(hallucinated_nodes) - 5} autres", ln=True)

	pdf.ln(2)

	pdf.ln(2)

	# ─────────────────────────────────────────────────────────────────
	# SECTION 3: Métriques des Arêtes
	# ─────────────────────────────────────────────────────────────────
	draw_section_title("ANALYSE DES ARETES") # ← Sans emoji

	# Tableau des métriques
	draw_table_header(["Metrique", "Precision", "Rappel", "F1-Score"])
	draw_table_row([
	"Aretes",
	f"{details.get('precision_edges', 0)}%",
	f"{details.get('recall_edges', 0)}%",
	f"{details.get('f1_edges', 0)}%"
	])

	pdf.ln(2)

	# Statistiques des arêtes
	tp_edges = details.get('tp_edges', [])
	fp_edges = details.get('fp_edges', [])
	fn_edges = details.get('fn_edges', [])
	inverted = details.get('inverted_edges', [])

	if not isinstance(tp_edges, list):
	tp_edges = list(tp_edges) if tp_edges else []
	if not isinstance(fp_edges, list):
	fp_edges = list(fp_edges) if fp_edges else []
	if not isinstance(fn_edges, list):
	fn_edges = list(fn_edges) if fn_edges else []
	if not isinstance(inverted, list):
	inverted = list(inverted) if inverted else []

	num_tp = len(tp_edges)
	num_fp = len(fp_edges)
	num_fn = len(fn_edges)
	num_inv = len(inverted)

	total_gt_edges = num_tp + num_fn
	total_test_edges = num_tp + num_fp + num_inv

	# Tableau récapitulatif
	draw_table_header(["Type", "GT", "Test", "Correctes", "Manquantes", "En trop", "Inversees"])
	draw_table_row([
	"Aretes",
	str(total_gt_edges),
	str(total_test_edges),
	str(num_tp),
	str(num_fn),
	str(num_fp),
	str(num_inv)
	])

	pdf.ln(3)

	# ─────────────────────────────────────────────────────────────────
	# SECTION 4: Hiérarchie
	# ─────────────────────────────────────────────────────────────────
	if 'gt_depth' in details:
	draw_section_title("STRUCTURE HIERARCHIQUE") # ← Sans emoji

	gt_depth = details.get('gt_depth', 0)
	test_depth = details.get('test_depth', 0)

	# Tableau hiérarchie
	draw_table_header(["Metrique", "Valeur"])
	draw_table_row(["Profondeur GT", f"{gt_depth} niveaux"], fill=True)
	draw_table_row(["Profondeur Test", f"{test_depth} niveaux"])
	draw_table_row(["Difference", f"{details.get('depth_difference', 0)} niveaux"], fill=True)
	draw_table_row(["Precision niveaux", f"{details.get('level_accuracy', 0)}%"])
	draw_table_row(["Similarite structure", f"{details.get('structure_similarity', 0)}%"], fill=True)

	pdf.ln(2)

	# Distribution par niveau
	if details.get('gt_nodes_per_level') and details.get('test_nodes_per_level'):
	pdf.set_font("DejaVu", "B", 9)
	pdf.cell(0, 6, "Distribution par niveau :", ln=True)

	draw_table_header(["Niveau", "Noeuds GT", "Noeuds Test", "Ecart"])

	max_level = max(
	max(details['gt_nodes_per_level'].keys(), default=0),
	max(details['test_nodes_per_level'].keys(), default=0)
	)

	for level in range(max_level + 1):
	gt_count = len(details['gt_nodes_per_level'].get(level, []))
	test_count = len(details['test_nodes_per_level'].get(level, []))
	ecart = abs(gt_count - test_count)

	draw_table_row([
	f"Niveau {level}",
	str(gt_count),
	str(test_count),
	str(ecart)
	], fill=(level % 2 == 0))

	pdf.ln(3)
	# ─────────────────────────────────────────────────────────────────
	# SECTION : Graph Edit Distance (GED)
	# ─────────────────────────────────────────────────────────────────
	if 'ged' in details and details['ged'] is not None:
	draw_section_title("GRAPH EDIT DISTANCE (GED)")

	ged_value = details.get('ged', 0)
	normalized_ged = details.get('normalized_ged', 0)
	structural_sim = details.get('structural_similarity', 0)

	# Tableau principal GED
	draw_table_header(["Metrique", "Valeur"])
	draw_table_row(["GED (operations)", f"{ged_value:.2f}"], fill=True)
	draw_table_row(["GED Normalise", f"{normalized_ged:.2f}%"])
	draw_table_row(["Similarite Structurelle", f"{structural_sim:.2f}%"], fill=True)

	pdf.ln(2)

	# Détails des opérations
	pdf.set_font("DejaVu", "B", 9)
	pdf.cell(0, 6, "Operations requises :", ln=True)

	node_ins = details.get('num_node_insertions', 0)
	node_del = details.get('num_node_deletions', 0)
	edge_ins = details.get('num_edge_insertions', 0)
	edge_del = details.get('num_edge_deletions', 0)

	# Tableau des opérations
	draw_table_header(["Type", "Insertions", "Suppressions", "Total"])
	draw_table_row(["Noeuds", str(node_ins), str(node_del), str(node_ins + node_del)], fill=True)
	draw_table_row(["Aretes", str(edge_ins), str(edge_del), str(edge_ins + edge_del)])

	pdf.ln(3)
	# ─────────────────────────────────────────────────────────────────
	# SECTION 5: Détails des erreurs
	# ─────────────────────────────────────────────────────────────────
	if pdf.get_y() < 200: # Augmenter la limite pour avoir plus d'espace
	draw_section_title("DETAILS DES ERREURS")

	pdf.set_font("DejaVu", "", 8)

	# ═════════════════════════════════════════════════════════════
	# Nœuds hallucinés
	# ═════════════════════════════════════════════════════════════
	hallucinated_nodes = details.get('hallucinated_nodes', [])
	if hallucinated_nodes:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 235, 230) # Fond rouge clair
	pdf.cell(0, 6, f"Noeuds hallucines ({len(hallucinated_nodes)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	for i, node in enumerate(hallucinated_nodes[:10], 1): # Top 10
	pdf.cell(5, 5, "", 0, 0) # Indentation
	pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True)

	if len(hallucinated_nodes) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(hallucinated_nodes) - 10} autres", ln=True)
	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════
	# Nœuds GT manqués
	# ═════════════════════════════════════════════════════════════
	missed_gt_nodes = details.get('missed_gt_nodes', [])
	if missed_gt_nodes:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 248, 225) # Fond orange clair
	pdf.cell(0, 6, f"Noeuds GT manques ({len(missed_gt_nodes)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	for i, node in enumerate(missed_gt_nodes[:10], 1): # Top 10
	pdf.cell(5, 5, "", 0, 0) # Indentation
	pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True)

	if len(missed_gt_nodes) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(missed_gt_nodes) - 10} autres", ln=True)
	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════
	# Nœuds en trop (extra_nodes)
	# ═════════════════════════════════════════════════════════════
	extra_nodes = details.get('extra_nodes', [])
	if extra_nodes:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(240, 240, 255) # Fond bleu clair
	pdf.cell(0, 6, f"Noeuds en trop ({len(extra_nodes)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	for i, node in enumerate(extra_nodes[:10], 1):
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, clean_text(f"{i}. {node}"), ln=True)

	if len(extra_nodes) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(extra_nodes) - 10} autres", ln=True)
	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════
	# Arêtes manquantes
	# ═════════════════════════════════════════════════════════════
	missing_edges = details.get('missing_edges', [])
	if missing_edges:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 243, 224) # Fond jaune clair
	pdf.cell(0, 6, f"Aretes manquantes ({len(missing_edges)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	for i, edge in enumerate(missing_edges[:10], 1):
	if isinstance(edge, tuple) and len(edge) == 2:
	src, tgt = edge
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt}"), ln=True)

	if len(missing_edges) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(missing_edges) - 10} autres", ln=True)
	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════
	# Arêtes en trop
	# ═════════════════════════════════════════════════════════════
	extra_edges = details.get('extra_edges', [])
	if extra_edges:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(235, 245, 255) # Fond cyan clair
	pdf.cell(0, 6, f"Aretes en trop ({len(extra_edges)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	for i, edge in enumerate(extra_edges[:10], 1):
	if isinstance(edge, tuple) and len(edge) == 2:
	src, tgt = edge
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt}"), ln=True)

	if len(extra_edges) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(extra_edges) - 10} autres", ln=True)
	pdf.ln(2)

	# ═════════════════════════════════════════════════════════════
	# Arêtes inversées
	# ═════════════════════════════════════════════════════════════
	inverted_edges = details.get('inverted_edges', [])
	if inverted_edges:
	pdf.set_font("DejaVu", "B", 9)
	pdf.set_fill_color(255, 240, 245) # Fond rose clair
	pdf.cell(0, 6, f"Aretes inversees ({len(inverted_edges)}) :", ln=True, fill=True)
	pdf.set_font("DejaVu", "", 8)

	if not isinstance(inverted_edges, list):
	inverted_edges = list(inverted_edges) if inverted_edges else []

	for i, edge in enumerate(inverted_edges[:10], 1):
	if isinstance(edge, tuple) and len(edge) == 2:
	src, tgt = edge
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, clean_text(f"{i}. {src} -> {tgt} (devrait etre {tgt} -> {src})"), ln=True)

	if len(inverted_edges) > 10:
	pdf.cell(5, 5, "", 0, 0)
	pdf.cell(0, 5, f"... et {len(inverted_edges) - 10} autres", ln=True)

	# ═════════════════════════════════════════════════════════════════════
	# PAGE DE SYNTHÈSE FINALE
	# ═════════════════════════════════════════════════════════════════════
	pdf.add_page()
	draw_colored_header("SYNTHESE GLOBALE", 76, 175, 80) # ← Sans emoji

	# Calcul des moyennes
	n = len(results)
	avg_prec = sum(r["details"].get("overall_precision", 0) for r in results) / n
	avg_rec = sum(r["details"].get("overall_recall", 0) for r in results) / n
	avg_f1 = sum(r["details"].get("overall_f1", 0) for r in results) / n

	draw_section_title("MOYENNES SUR TOUS LES DOCUMENTS") #

	draw_progress_bar("Precision Moyenne", avg_prec)
	draw_progress_bar("Rappel Moyen", avg_rec)
	draw_progress_bar("F1-Score Moyen", avg_f1)

	pdf.ln(5)

	# Tableau récapitulatif par document
	draw_section_title("RECAPITULATIF PAR DOCUMENT")

	draw_table_header(["Document", "Precision", "Rappel", "F1"])
	for i, r in enumerate(results, 1):
	d = r["details"]
	draw_table_row([
	r["file"][:25], # Tronquer si trop long
	f"{d.get('overall_precision', 0):.1f}%",
	f"{d.get('overall_recall', 0):.1f}%",
	f"{d.get('overall_f1', 0):.1f}%"
	], fill=(i % 2 == 0))

	# ═════════════════════════════════════════════════════════════════════
	# Sauvegarde
	# ═════════════════════════════════════════════════════════════════════
	output_path = f"reports/{model_name}_report.pdf"
	pdf.output(output_path)
	logging.info(f"Rapport PDF moderne sauvegarde dans {output_path}")

	def plot_all_boxplots():
	"""
	Génère 3 box-plots comparant les performances de tous les modèles testés.

	Returns:
	tuple: (fig_precision, fig_recall, fig_f1)
	3 figures matplotlib prêtes à afficher

	Note:
	Utilise la liste globale model_precision_records qui accumule
	les résultats de tous les modèles testés dans la session.

	Box-plots générés:
	1. Distribution des précisions par modèle
	2. Distribution des rappels par modèle
	3. Distribution des F1-scores par modèle

	Interprétation:
	- Boîte: Q1 à Q3 (50% des valeurs)
	- Ligne centrale: médiane
	- Moustaches: min/max (hors outliers)
	- Points: outliers
	"""
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 1: Regroupement des métriques par modèle
	# ═════════════════════════════════════════════════════════════════════
	records_by_model = {}

	# Parcourir tous les enregistrements
	for rec in model_precision_records:
	# Créer une liste pour chaque modèle si elle n'existe pas
	records_by_model.setdefault(rec["model"], []).append(rec)

	# Extraire les noms de modèles
	models = list(records_by_model.keys())

	# Créer les listes de métriques par modèle
	# Format: [[prec_model1_doc1, prec_model1_doc2, ...], [prec_model2_doc1, ...], ...]
	precisions = [[r["precision"] for r in records_by_model[m]] for m in models]
	recalls = [[r["recall"] for r in records_by_model[m]] for m in models]
	f1s = [[r["f1"] for r in records_by_model[m]] for m in models]

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 2: Génération des box-plots
	# ═════════════════════════════════════════════════════════════════════
	figs = []

	# Boucle sur les 3 métriques
	for data, ylabel, title in (
	(precisions, "Précision (%)", "Distribution des Précisions"),
	(recalls, "Rappel (%)", "Distribution des Rappels"),
	(f1s, "F1-Score (%)", "Distribution des F1-Scores")
	):
	# Vérifier qu'on a des données
	if not data or not models:
	continue

	# Créer une nouvelle figure
	fig, ax = plt.subplots()

	# Dessiner le box-plot
	ax.boxplot(data, tick_labels=models)

	# Configurer les labels
	ax.set_ylabel(ylabel)
	ax.set_title(title)

	# Rotation des labels pour lisibilité
	plt.xticks(rotation=30, ha="right")
	plt.tight_layout()

	figs.append(fig)

	return tuple(figs)


	def save_performance_plots():
	"""
	Sauvegarde les box-plots de performance dans des fichiers temporaires.

	Returns:
	list: Chemins vers les 3 fichiers PNG générés

	Note:
	Les fichiers sont créés dans le dossier temporaire du système
	et seront automatiquement nettoyés par le système d'exploitation.

	Fichiers générés:
	- precision_XXXXX.png
	- recall_XXXXX.png
	- f1_XXXXX.png
	"""
	# Générer les 3 figures
	figs = plot_all_boxplots()

	tmp_paths = []

	# Sauvegarder chaque figure dans un fichier temporaire
	for fig, prefix in zip(figs, ("precision", "recall", "f1")):
	# Créer un fichier temporaire
	tmp = tempfile.NamedTemporaryFile(
	delete=False, # Ne pas supprimer automatiquement
	suffix=".png", # Extension du fichier
	prefix=f"{prefix}_", # Préfixe du nom
	dir=tempfile.gettempdir() # Dossier temporaire du système
	)

	# Sauvegarder la figure
	fig.savefig(tmp.name, format="png", bbox_inches="tight")

	# Fermer la figure pour libérer la mémoire
	plt.close(fig)

	# Fermer le fichier
	tmp.close()

	# Ajouter le chemin à la liste
	tmp_paths.append(tmp.name)

	return tmp_paths


	# ============================================
	# SECTION 8 : FONCTION PRINCIPALE DE TRAITEMENT
	# ============================================

	def process_files(files, model_choice, use_semantic=True, fuzzy_threshold=80,
	semantic_threshold=70, alpha=0.6, edge_mode="hybrid"):
	"""
	Fonction principale : traite un batch de fichiers et évalue les performances.

	Args:
	files (list): Liste de fichiers uploadés (objets Gradio)
	model_choice (str): Nom du modèle à utiliser
	use_semantic (bool): Activer le matching sémantique (vs fuzzy seul)
	fuzzy_threshold (int): Seuil fuzzy matching (0-100)
	semantic_threshold (int): Seuil semantic matching (0-100)
	alpha (float): Pondération fuzzy/sémantique (0-1)

	Returns:
	tuple: (images, mermaid_text, dropdown_update, iframe_dict,
	initial_graph, summary_html, report_msg)

	Pipeline complet:
	1. Nettoyage de Neo4j
	2. Pour chaque fichier:
	a. Conversion PDF → Image (si nécessaire)
	b. Encodage Base64
	c. Génération Mermaid par le modèle d'IA
	d. Conversion Mermaid → JSON
	e. Comparaison avec Ground Truth
	f. Upload dans Neo4j
	g. Sauvegarde des différences
	3. Calcul des métriques moyennes
	4. Génération du rapport PDF
	5. Construction des outputs pour Gradio

	Note:
	Cette fonction orchestre tout le workflow de l'application
	"""
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 0: Validation des entrées
	# ═════════════════════════════════════════════════════════════════════
	if not files:
	return None, "", gr_update(choices=[], value=None), {}, "", "", ""

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 1: Initialisation
	# ═════════════════════════════════════════════════════════════════════
	neo4j_ok = clear_neo4j() # Vider la base pour ce batch
	if not neo4j_ok:
	logging.error("Neo4j indisponible: la génération continue sans upload Neo4j.")



	# Initialisation des structures de données
	results = [] # Résultats de comparaison pour chaque fichier
	images = [] # Images pour la galerie Gradio
	mermaids = [] # Codes Mermaid générés
	docs = [] # Noms des documents
	iframe_dict = {} # Mapping doc_name → HTML iframe
	error_messages = []

	model_choice_str = str(model_choice or "")
	model_key = "Gemini"
	if model_choice_str.lower().startswith("gemini"):
	model_key = "Gemini"
	elif model_choice_str.lower().startswith("gemma"):
	model_key = "GEMMA"
	elif model_choice_str.lower().startswith("llama"):
	model_key = "LLAMA"
	elif model_choice_str.lower().startswith("qwen"):
	model_key = "QWEN"
	elif model_choice_str.lower().startswith("nvidia"):
	model_key = "NVIDIA"
	elif model_choice_str.lower().startswith("internvl"):
	model_key = "InternVL"

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 2: Traitement de chaque fichier
	# ═════════════════════════════════════════════════════════════════════
	for file in files:
	diff_path = None # Chemin du fichier de différences

	try:
	# ─────────────────────────────────────────────────────────────
	# 2.1: Préparation
	# ─────────────────────────────────────────────────────────────
	# Extraire le nom du fichier (sans extension)
	name = os.path.splitext(os.path.basename(file.name))[0]

	# Chemin vers le Ground Truth correspondant
	gt = os.path.join("GT", f"{name}.json")

	gt_exists = os.path.exists(gt)
	if not gt_exists:
	logging.error(f"Ground truth manquant pour {name}")

	# ─────────────────────────────────────────────────────────────
	# 2.2: Conversion en image (si PDF)
	# ─────────────────────────────────────────────────────────────
	if file.name.lower().endswith(".pdf"):
	img = convert_pdf_to_image(file.name)
	else:
	img = file.name # Déjà une image

	# ─────────────────────────────────────────────────────────────
	# 2.3: Encodage Base64
	# ─────────────────────────────────────────────────────────────
	b64 = encode_image(img)

	# ─────────────────────────────────────────────────────────────
	# 2.4: Génération du code Mermaid par le modèle
	# ─────────────────────────────────────────────────────────────
	# Sélection de la fonction selon le modèle choisi
	mermaid_fn = {


	"Gemini": generate_mermaid_from_image_gemini,
	"LLAMA": generate_mermaid_from_llama,
	"GEMMA": generate_mermaid_from_gemma,
	"QWEN": generate_mermaid_from_qwen,
	"NVIDIA": generate_mermaid_from_nvidia,
	"InternVL": generate_mermaid_from_intern,


	}.get(model_key, generate_mermaid_from_image_gemini) # Gemini par défaut

	# Appel du modèle
	code = mermaid_fn(b64)

	# ─────────────────────────────────────────────────────────────
	# 2.5: Conversion Mermaid → JSON
	# ─────────────────────────────────────────────────────────────
	j = mermaid_to_json(code)

	if gt_exists:
	gt_n, gt_e = load_json(gt)

	# ─────────────────────────────────────────────────────────────
	# 2.6: Comparaison avec le Ground Truth
	# ─────────────────────────────────────────────────────────────
	# Choix de la fonction de comparaison selon le mode
	if gt_exists:
	if use_semantic:
	cmp = compare_graphs_with_semantic_fast(
	gt_n, gt_e, j["nodes"], j["edges"],
	fuzzy_threshold=fuzzy_threshold,
	semantic_threshold=semantic_threshold,
	alpha=alpha,
	edge_mode=edge_mode
	)
	else:
	cmp = compare_graphs(
	gt_n, gt_e, j["nodes"], j["edges"],
	threshold=fuzzy_threshold
	)
	else:
	cmp = {
	"overall_precision": 0,
	"overall_recall": 0,
	"overall_f1": 0,
	"precision_nodes": 0,
	"recall_nodes": 0,
	"f1_nodes": 0,
	"precision_edges": 0,
	"recall_edges": 0,
	"f1_edges": 0,
	"missing_nodes": [],
	"extra_nodes": [],
	"missing_edges": [],
	"extra_edges": [],
	"hallucination_rate": 0,
	"detection_rate": 0,
	"total_generated": len(j.get("nodes", [])),
	"num_hallucinated": 0,
	"num_correct": 0,
	"hallucinated_nodes": [],
	"correct_nodes": [],
	"total_gt": 0,
	"num_detected": 0,
	"num_missed": 0,
	"missed_gt_nodes": []
	}

	# ─────────────────────────────────────────────────────────────
	# 2.7: Sauvegarde des différences dans un fichier JSON
	# ─────────────────────────────────────────────────────────────
	diff_path = f"graph_diff_{name}.json"
	diff_data = {
	"missing_nodes": cmp["missing_nodes"],
	"extra_nodes": cmp["extra_nodes"],
	"missing_edges": cmp["missing_edges"],
	"extra_edges": cmp["extra_edges"],
	}

	# Ajouter les détails de matching si disponibles
	if "matching_details" in cmp:
	diff_data["matching_details"] = cmp["matching_details"]

	# Écriture du fichier
	with open(diff_path, "w", encoding="utf-8") as f:
	json.dump(diff_data, f, ensure_ascii=False, indent=2)

	logging.info(f"Différences écrites pour {name} → {diff_path}")

	# ─────────────────────────────────────────────────────────────
	# 2.8: Upload dans Neo4j
	# ─────────────────────────────────────────────────────────────
	# Charger GT et Test
	if gt_exists:
	gt_nodes, gt_edges = load_json(gt)
	gt_json = {"nodes": [{"id": k, "label": v} for k, v in gt_nodes.items()], "edges": gt_edges}
	else:
	gt_json = {"nodes": [], "edges": []}

	# Upload simultané du GT et du Test
	if neo4j_ok:
	upload_gt_and_test_to_neo4j(gt_json, j, name)

	# ─────────────────────────────────────────────────────────────
	# 2.9: Enregistrement des résultats
	# ─────────────────────────────────────────────────────────────
	results.append({
	"file": name,
	"precision": cmp["overall_precision"],
	"details": {
	**cmp,
	"test_nodes": j.get("nodes", []),
	"test_edges": j.get("edges", [])
	}
	})

	images.append(Image.open(img))
	mermaids.append(code)

	except Exception as e:
	logging.error(f"Erreur sur le fichier {file.name}: {e}")
	error_messages.append(f"{os.path.basename(file.name)}: {e}")
	continue

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 3: Gestion des erreurs globales
	# ═════════════════════════════════════════════════════════════════════
	if not results:
	empty_msg = '<div style="text-align:center;padding:40px;color:#7f8c8d;background:#f8f9fa;border:2px dashed #e0e0e0;border-radius:8px;">Aucun graphe disponible</div>'

	if error_messages:
	details = "<br/>".join(error_messages[-8:])
	empty_msg = (
	'<div style="padding:16px;background:#fff3cd;border:1px solid #ffeeba;border-radius:8px;">'
	'<div style="font-weight:600;margin-bottom:8px;">Erreurs lors de la génération</div>'
	f'<div style="font-family:monospace;white-space:pre-wrap;">{details}</div>'
	'</div>'
	)

	return (
	None, # 1. images (gallery)
	"", # 2. mermaid (textbox)
	gr_update(choices=[], value=None), # 3. dropdown
	{}, # 4. iframe_dict (json)
	empty_msg, # 5. gt_graph_panel (html) ← AJOUTÉ
	empty_msg, # 6. test_graph_panel (html) ← AJOUTÉ
	"<div>Erreur sur tous les fichiers</div>", # 7. summary (html)
	"" # 8. report_status (textbox)
	)



	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 4: Calcul des métriques moyennes
	# ═════════════════════════════════════════════════════════════════════
	n = len(results) # Nombre de fichiers traités avec succès

	# Métriques classiques
	avg_precision = round(
	sum(r["details"]["overall_precision"] for r in results) / n, 2
	)
	avg_recall = round(
	sum(r["details"]["overall_recall"] for r in results) / n, 2
	)
	avg_f1 = round(
	sum(r["details"]["overall_f1"] for r in results) / n, 2
	)

	# Métriques d'hallucination
	avg_hallucination = round(
	sum(r["details"].get("hallucination_rate", 0) for r in results) / n, 2
	)
	avg_detection = round(
	sum(r["details"].get("detection_rate", 0) for r in results) / n, 2
	)

	# Totaux cumulés
	total_hallucinated = sum(r["details"].get("num_hallucinated", 0) for r in results)
	total_generated = sum(r["details"].get("total_generated", 0) for r in results)
	total_detected = sum(r["details"].get("num_detected", 0) for r in results)
	total_gt = sum(r["details"].get("total_gt", 0) for r in results)

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 5: Enregistrement pour les graphiques de performance
	# ═════════════════════════════════════════════════════════════════════
	model_precision_records.append({
	"model": model_choice,
	"precision": avg_precision,
	"recall": avg_recall,
	"f1": avg_f1,
	"hallucination_rate": avg_hallucination
	})

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 6: Génération du rapport PDF
	# ═════════════════════════════════════════════════════════════════════
	try:
	save_results_to_pdf(results, model_choice)
	report_status = "Rapport PDF généré."
	except Exception as e:
	logging.error(f"⚠️ Génération PDF ignorée (l'application continue): {e}")
	report_status = f"Rapport PDF non généré: {e}"

	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 7: Génération du HTML de résumé pour l'interface
	# ═════════════════════════════════════════════════════════════════════
	# Calculer les moyennes des métriques d'arêtes
	avg_precision_edges = round(
	sum(r["details"]["precision_edges"] for r in results) / n, 2
	)
	avg_recall_edges = round(
	sum(r["details"]["recall_edges"] for r in results) / n, 2
	)
	avg_f1_edges = round(
	sum(r["details"]["f1_edges"] for r in results) / n, 2
	)
	avg_inversion_rate = round(
	sum(r["details"].get("inversion_rate", 0) for r in results) / n, 2
	)

	# Calculer les moyennes des métriques hiérarchiques
	avg_gt_depth = round(
	sum(r["details"].get("gt_depth", 0) for r in results) / n, 1
	)
	avg_test_depth = round(
	sum(r["details"].get("test_depth", 0) for r in results) / n, 1
	)
	avg_level_accuracy = round(
	sum(r["details"].get("level_accuracy", 0) for r in results) / n, 2
	)
	avg_structure_similarity = round(
	sum(r["details"].get("structure_similarity", 0) for r in results) / n, 2
	)
	depth_matches = sum(1 for r in results if r["details"].get("depth_match", False))
	depth_match_rate = round(depth_matches / n * 100, 2)

	# CALCUL DES MOYENNES GED
	avg_ged = round(
	sum(r["details"].get("ged", 0) for r in results if r["details"].get("ged") is not None) / n, 2
	) if n > 0 else 0

	avg_normalized_ged = round(
	sum(r["details"].get("normalized_ged", 0) for r in results if
	r["details"].get("normalized_ged") is not None) / n, 2
	) if n > 0 else 0

	avg_structural_sim_ged = round(
	sum(r["details"].get("structural_similarity", 0) for r in results if
	r["details"].get("structural_similarity") is not None) / n, 2
	) if n > 0 else 0

	total_node_ins = sum(r["details"].get("num_node_insertions", 0) for r in results)
	total_node_del = sum(r["details"].get("num_node_deletions", 0) for r in results)
	total_edge_ins = sum(r["details"].get("num_edge_insertions", 0) for r in results)
	total_edge_del = sum(r["details"].get("num_edge_deletions", 0) for r in results)

	summary_html = f"""
	<div style='font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; background: #ffffff; padding: 24px; border-radius: 12px; border: 1px solid #e0e0e0; box-shadow: 0 1px 3px rgba(0,0,0,0.08);'>

	<!-- En-tête sobre -->
	<div style="border-bottom: 2px solid #f5f5f5; padding-bottom: 16px; margin-bottom: 24px;">
	<h2 style='color: #2c3e50; margin: 0; font-size: 1.5em; font-weight: 600;'>
	Résumé des Performances
	</h2>
	<p style="color: #7f8c8d; margin: 8px 0 0 0; font-size: 0.9em;">
	Analyse de {n} document(s) • Modèle : {model_choice} • Mode : {edge_mode}
	</p>
	</div>

	<!-- Métriques Principales -->
	<div style="margin-bottom: 32px;">
	<h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 16px 0;">
	Métriques Globales
	</h3>
	<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 16px;">
	<!-- Précision -->
	<div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #3498db;">
	<div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Précision</div>
	<div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_precision}%</div>
	</div>
	<!-- Rappel -->
	<div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #2ecc71;">
	<div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">Rappel</div>
	<div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_recall}%</div>
	</div>
	<!-- F1-Score -->
	<div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 3px solid #9b59b6;">
	<div style="color: #7f8c8d; font-size: 0.75em; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 8px;">F1-Score</div>
	<div style="color: #2c3e50; font-size: 2em; font-weight: 700; line-height: 1;">{avg_f1}%</div>
	</div>
	</div>
	</div>

	<!-- Analyse des Nœuds et Arêtes -->
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 24px; margin-bottom: 32px;">

	<!-- Nœuds -->
	<div>
	<h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;">
	Analyse des Nœuds
	</h3>
	<div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; overflow: hidden;">
	<table style="width: 100%; border-collapse: collapse; font-size: 0.85em;">
	<tr style="background: #f8f9fa;">
	<th style="padding: 10px; text-align: left; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Métrique</th>
	<th style="padding: 10px; text-align: right; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Valeur</th>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Précision</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["precision_nodes"] for r in results) / n:.1f}%</td>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Rappel</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["recall_nodes"] for r in results) / n:.1f}%</td>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">F1-Score</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{sum(r["details"]["f1_nodes"] for r in results) / n:.1f}%</td>
	</tr>
	<tr style="background: #fef9e7; border-top: 2px solid #f39c12;">
	<td style="padding: 10px; color: #e67e22; font-weight: 600;">Hallucination</td>
	<td style="padding: 10px; text-align: right; font-weight: 700; color: #e67e22;">{avg_hallucination}%</td>
	</tr>
	</table>
	</div>
	</div>

	<!-- Arêtes -->
	<div>
	<h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;">
	Analyse des Arêtes
	</h3>
	<div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; overflow: hidden;">
	<table style="width: 100%; border-collapse: collapse; font-size: 0.85em;">
	<tr style="background: #f8f9fa;">
	<th style="padding: 10px; text-align: left; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Métrique</th>
	<th style="padding: 10px; text-align: right; color: #7f8c8d; font-weight: 600; font-size: 0.8em; text-transform: uppercase; letter-spacing: 0.5px;">Valeur</th>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Précision</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_precision_edges}%</td>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">Rappel</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_recall_edges}%</td>
	</tr>
	<tr>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; color: #34495e;">F1-Score</td>
	<td style="padding: 10px; border-top: 1px solid #f0f0f0; text-align: right; font-weight: 600; color: #2c3e50;">{avg_f1_edges}%</td>
	</tr>
	<tr style="background: #fef5e7; border-top: 2px solid #e67e22;">
	<td style="padding: 10px; color: #d35400; font-weight: 600;">Inversions</td>
	<td style="padding: 10px; text-align: right; font-weight: 700; color: #d35400;">{avg_inversion_rate}%</td>
	</tr>
	</table>
	</div>
	</div>
	</div>

	<!-- Hiérarchie -->
	<div style="margin-bottom: 32px;">
	<h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;">
	Structure Hiérarchique
	</h3>
	<div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 16px;">
	<div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px;">
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Profondeur GT</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_gt_depth}</div>
	</div>
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Profondeur Test</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_test_depth}</div>
	</div>
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Précision Niveaux</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_level_accuracy}%</div>
	</div>
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Similarité</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_structure_similarity}%</div>
	</div>
	</div>
	<div style="margin-top: 12px; padding-top: 12px; border-top: 1px solid #e0e0e0; color: #7f8c8d; font-size: 0.75em;">
	{depth_matches} document(s) avec profondeur exacte sur {n} ({depth_match_rate}%)
	</div>
	</div>
	</div>

	<!-- Graph Edit Distance -->
	<div style="margin-bottom: 32px;">
	<h3 style="color: #34495e; font-size: 1em; font-weight: 600; margin: 0 0 12px 0;">
	Graph Edit Distance (GED)
	</h3>
	<div style="background: #ffffff; border: 1px solid #e0e0e0; border-radius: 8px; padding: 16px;">

	<!-- Métriques principales GED -->
	<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; margin-bottom: 16px;">
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">GED Moyen</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_ged}</div>
	</div>
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">GED Normalisé</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_normalized_ged}%</div>
	</div>
	<div style="text-align: center; padding: 12px; background: #f8f9fa; border-radius: 6px;">
	<div style="color: #7f8c8d; font-size: 0.7em; margin-bottom: 4px;">Similarité</div>
	<div style="color: #2c3e50; font-size: 1.5em; font-weight: 700;">{avg_structural_sim_ged}%</div>
	</div>
	</div>

	<!-- Détails des opérations -->
	<div style="padding-top: 12px; border-top: 1px solid #e0e0e0;">
	<div style="color: #7f8c8d; font-size: 0.75em; margin-bottom: 8px; font-weight: 600;">Opérations totales requises :</div>
	<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 8px; font-size: 0.85em;">
	<div style="background: #fef5e7; padding: 8px; border-radius: 4px;">
	<span style="color: #7f8c8d;">Insertions nœuds :</span>
	<span style="color: #e67e22; font-weight: 600; margin-left: 8px;">{total_node_ins}</span>
	</div>
	<div style="background: #fef5e7; padding: 8px; border-radius: 4px;">
	<span style="color: #7f8c8d;">Suppressions nœuds :</span>
	<span style="color: #e67e22; font-weight: 600; margin-left: 8px;">{total_node_del}</span>
	</div>
	<div style="background: #ebf5fb; padding: 8px; border-radius: 4px;">
	<span style="color: #7f8c8d;">Insertions arêtes :</span>
	<span style="color: #3498db; font-weight: 600; margin-left: 8px;">{total_edge_ins}</span>
	</div>
	<div style="background: #ebf5fb; padding: 8px; border-radius: 4px;">
	<span style="color: #7f8c8d;">Suppressions arêtes :</span>
	<span style="color: #3498db; font-weight: 600; margin-left: 8px;">{total_edge_del}</span>
	</div>
	</div>

	<div style="margin-top: 12px; padding: 8px; background: #f0f4f8; border-radius: 4px; font-size: 0.8em; color: #5a6c7d;">
	💡 Le GED mesure le nombre minimum d'opérations (insertions/suppressions) pour transformer le graphe test en graphe GT
	</div>
	</div>
	</div>
	</div>

	<!-- Statistiques Détaillées -->
	<div style="background: #f8f9fa; border-radius: 8px; padding: 16px;">
	<h3 style="color: #34495e; font-size: 0.9em; font-weight: 600; margin: 0 0 12px 0;">
	Statistiques de Génération
	</h3>
	<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 16px; font-size: 0.85em;">
	<div>
	<span style="color: #7f8c8d;">Nœuds générés :</span>
	<span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{total_generated}</span>
	</div>
	<div>
	<span style="color: #7f8c8d;">Nœuds GT :</span>
	<span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{total_gt}</span>
	</div>
	<div>
	<span style="color: #7f8c8d;">Nœuds corrects :</span>
	<span style="color: #27ae60; font-weight: 600; margin-left: 8px;">{total_generated - total_hallucinated}</span>
	</div>
	<div>
	<span style="color: #7f8c8d;">Nœuds détectés :</span>
	<span style="color: #27ae60; font-weight: 600; margin-left: 8px;">{total_detected}</span>
	</div>
	<div>
	<span style="color: #7f8c8d;">Nœuds hallucinés :</span>
	<span style="color: #e74c3c; font-weight: 600; margin-left: 8px;">{total_hallucinated}</span>
	</div>
	<div>
	<span style="color: #7f8c8d;">Taux de détection :</span>
	<span style="color: #2c3e50; font-weight: 600; margin-left: 8px;">{avg_detection}%</span>
	</div>
	</div>
	</div>

	</div>
	"""


	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 8: Construction des iframes pour la visualisation Neo4j
	# ═════════════════════════════════════════════════════════════════════
	for r in results:
	doc = r["file"]
	docs.append(doc)

	# Rendu Mermaid directement (compatible HF, pas besoin de serveur local:8000)
	gt_path = os.path.join("GT", f"{doc}.json")
	if os.path.exists(gt_path):
	gt_nodes, gt_edges = load_json(gt_path)
	gt_json = {"nodes": [{"id": k, "label": v} for k, v in gt_nodes.items()], "edges": gt_edges}
	gt_mermaid = graph_json_to_mermaid(gt_json)
	else:
	gt_mermaid = "graph TD;"
	test_mermaid = graph_json_to_mermaid({"nodes": r["details"].get("test_nodes", []), "edges": r["details"].get("test_edges", [])})

	gt_iframe_html = mermaid_to_html(gt_mermaid, f"mermaid_gt_{doc}")
	test_iframe_html = mermaid_to_html(test_mermaid, f"mermaid_test_{doc}")

	# Stocker les 2 iframes
	iframe_dict[doc] = {
	"gt": gt_iframe_html,
	"test": test_iframe_html
	}
	# ═════════════════════════════════════════════════════════════════════
	# ÉTAPE 9: Préparation des sorties pour Gradio
	# ═════════════════════════════════════════════════════════════════════
	# Mise à jour du dropdown avec la liste des documents
	dropdown_update = gr_update(choices=docs, value=docs[0])

	# Iframes initiaux (premier document)
	first_doc_iframes = iframe_dict[docs[0]]
	initial_gt_graph = first_doc_iframes["gt"]
	initial_test_graph = first_doc_iframes["test"]

	# Message de confirmation
	report_msg = f"Rapport sauvegardé : reports/{model_choice}_report.pdf"

	# ═════════════════════════════════════════════════════════════════
	# RETOUR: Tuple de 8 éléments pour Gradio (1 de plus qu'avant)
	# ═════════════════════════════════════════════════════════════════
	return (
	images, # 1. Galerie d'images
	"\n\n".join(mermaids), # 2. Codes Mermaid
	dropdown_update, # 3. Dropdown
	iframe_dict, # 4. Dict des iframes
	initial_gt_graph, # 5. GT du premier doc
	initial_test_graph, # 6. Test du premier doc
	summary_html, # 7. Résumé
	report_msg # 8. Message statut
	)
	# ============================================
	# SECTION 9 : FONCTIONS AUXILIAIRES POUR L'INTERFACE
	# ============================================

	def select_graph(selected_doc, iframe_dict):
	"""
	Retourne les 2 iframes HTML (GT + Test) pour le document sélectionné.

	Args:
	selected_doc (str): Nom du document sélectionné
	iframe_dict (dict): Mapping doc_name → {"gt": iframe_gt, "test": iframe_test}

	Returns:
	tuple: (gt_iframe_html, test_iframe_html)
	"""
	if not selected_doc or selected_doc not in iframe_dict:
	empty_msg = '<div style="text-align:center;padding:40px;color:#7f8c8d;">Aucun graphe disponible</div>'
	return empty_msg, empty_msg

	iframes = iframe_dict[selected_doc]
	return iframes["gt"], iframes["test"]


	def aggregate_and_preview(paths, threshold, fuzzy_threshold=70, semantic_threshold=70):
	"""
	VERSION AVEC MÉTRIQUES DE COMPARAISON
	"""
	if not paths or len(paths) == 0:
	error_html = """
	<div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#856404;">⚠️ Aucun fichier JSON</h3>
	<p style="color:#856404;">Exportez d'abord les fichiers à l'étape 5.1</p>
	</div>
	"""
	return None, error_html, "" # ← 3 outputs maintenant

	logging.info(f"🔀 Début agrégation: {len(paths)} fichiers")

	try:
	agg = aggregate_mindmaps(
	paths,
	"aggregate.json",
	threshold / 100,
	fuzzy_threshold=fuzzy_threshold,
	semantic_threshold=semantic_threshold
	)

	if len(agg['nodes']) == 0:
	warning_html = f"""
	<div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#856404;">⚠️ Graphe vide</h3>
	<p style="color:#856404;">
	Aucun nœud au-dessus du seuil de {threshold}%<br>
	<strong>→ Diminuez le seuil (ex: 30%)</strong>
	</p>
	</div>
	"""
	return "aggregate.json", warning_html, "" # ← 3 outputs

	except Exception as e:
	logging.error(f"❌ Erreur agrégation: {e}")
	import traceback
	logging.error(traceback.format_exc())
	error_html = f"""
	<div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#721c24;">❌ Erreur</h3>
	<p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p>
	</div>
	"""
	return None, error_html, "" # ← 3 outputs

	# Générer l'iframe
	ts = int(time.time())
	src = f"http://127.0.0.1:8000/visual_aggregate.html?t={ts}"

	iframe_html = f"""
	<div style="border:1px solid #e0e0e0;border-radius:8px;overflow:hidden;background:white;">
	<div style="background:#f8f9fa;padding:12px;border-bottom:1px solid #e0e0e0;">
	<h4 style="margin:0;color:#2c3e50;">
	🔗 Graphe Agrégé
	<span style="color:#7f8c8d;font-size:0.85em;font-weight:normal;">
	({len(agg['nodes'])} nœuds, {len(agg['edges'])} arêtes)
	</span>
	</h4>
	<p style="margin:5px 0 0 0;font-size:0.75em;color:#7f8c8d;">
	Seuils: Fréquence {threshold}% \| Fuzzy {fuzzy_threshold}% \| Sémantique {semantic_threshold}%
	</p>
	</div>
	<iframe src="{src}"
	style="width:100%;height:700px;border:none;background:white;display:block;">
	</iframe>
	</div>
	"""

	# CORRECTION : Passer les seuils à la fonction de comparaison
	metrics_html = compare_aggregated_with_gt(
	GT_ORIGINAL_PATH,
	"aggregate.json",
	fuzzy_threshold=fuzzy_threshold,
	semantic_threshold=semantic_threshold,
	frequency_threshold=threshold
	)

	logging.info(f"✅ Visualisation générée: {src}")

	return "aggregate.json", iframe_html, metrics_html # ← 3 outputs

	def export_mindmaps_to_json(files, model_choice="Gemini", output_dir=None):
	"""
	Exporte les mindmaps en fichiers JSON pour agrégation.
	"""
	if output_dir is None:
	output_dir = OUTPUT_JSON_DIR

	os.makedirs(output_dir, exist_ok=True)

	model_functions = {
	"Gemini": generate_mermaid_from_image_gemini,
	"LLAMA": generate_mermaid_from_llama,
	"GEMMA": generate_mermaid_from_gemma,
	"QWEN": generate_mermaid_from_qwen,
	"NVIDIA": generate_mermaid_from_nvidia,
	"InternVL": generate_mermaid_from_intern,
	}

	mermaid_fn = model_functions.get(model_choice, generate_mermaid_from_image_gemini)
	saved_paths = []

	logging.info("=" * 70)
	logging.info(f"📤 EXPORT JSON - Modèle: {model_choice}")
	logging.info(f"📁 Dossier: {output_dir}")
	logging.info(f"📄 Fichiers: {len(files) if files else 0}")
	logging.info("=" * 70)

	if not files:
	logging.warning("⚠️ Aucun fichier à exporter")
	return []

	# Traitement de tous les fichiers
	for idx, file in enumerate(files, 1):
	try:
	# Récupérer le chemin du fichier
	file_path = file.name if hasattr(file, 'name') else file

	logging.info(f"\n[{idx}/{len(files)}] Traitement: {os.path.basename(file_path)}")

	# Conversion en image si PDF
	img = file_path
	if file_path.lower().endswith(".pdf"):
	logging.info(" 📄 → 🖼️ Conversion PDF...")
	img = convert_pdf_to_image(file_path)

	# Encodage Base64
	logging.info(" 🔐 Encodage...")
	b64 = encode_image(img)

	# Génération Mermaid
	logging.info(f" 🤖 Génération avec {model_choice}...")
	mermaid = mermaid_fn(b64)

	# Conversion JSON
	logging.info(" 🔄 Conversion JSON...")
	graph_json = mermaid_to_json(mermaid)

	# Sauvegarde
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	output_path = os.path.join(output_dir, f"{base_name}.json")

	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(graph_json, f, ensure_ascii=False, indent=2)

	saved_paths.append(output_path)

	logging.info(f" ✅ Exporté: {output_path}")
	logging.info(f" • Nœuds: {len(graph_json['nodes'])}")
	logging.info(f" • Arêtes: {len(graph_json['edges'])}")

	except Exception as e:
	logging.error(f" ❌ Erreur: {e}")
	import traceback
	logging.error(traceback.format_exc())
	continue

	logging.info("=" * 70)
	logging.info(f"✅ EXPORT TERMINÉ: {len(saved_paths)}/{len(files)}")
	logging.info("=" * 70)

	return saved_paths


	def compare_aggregated_with_gt(gt_path, agg_json_path, fuzzy_threshold=70, semantic_threshold=70,
	frequency_threshold=40):
	"""
	Filtrage selon 'freq' en %
	"""
	try:
	# Charger les deux graphes
	with open(gt_path, 'r', encoding='utf-8') as f:
	gt_data = json.load(f)

	with open(agg_json_path, 'r', encoding='utf-8') as f:
	agg_data = json.load(f)

	gt_nodes = gt_data.get('nodes', [])
	gt_edges = gt_data.get('edges', [])
	all_agg_nodes = agg_data.get('nodes', [])
	all_agg_edges = agg_data.get('edges', [])



	# Filtrer les nœuds selon leur fréquence (valeurs déjà en %)
	agg_nodes = [n for n in all_agg_nodes if n.get('freq', 0) >= frequency_threshold]

	# Nettoyer les labels (retirer " (XX.XX%)")
	for node in agg_nodes:
	label = node.get('label', '')
	if '(' in label:
	# "OOP (33.33%)" → "OOP"
	node['label'] = label.split('(')[0].strip()

	# Logs de diagnostic
	logging.info("═" * 70)
	logging.info(f"🔍 FILTRAGE PAR FRÉQUENCE (seuil: {frequency_threshold}%)")
	logging.info("═" * 70)
	logging.info(f"📊 Avant filtrage : {len(all_agg_nodes)} nœuds, {len(all_agg_edges)} arêtes")
	logging.info(f"📊 Après filtrage : {len(agg_nodes)} nœuds")

	if all_agg_nodes:
	sample = all_agg_nodes[0]
	logging.info(f"📊 Exemple nœud brut : {sample}")
	logging.info(f"📊 Valeur 'freq' : {sample.get('freq', 'NON TROUVÉ')}")

	if not agg_nodes:
	logging.error(f"❌ AUCUN NŒUD après filtrage à {frequency_threshold}%")
	logging.error(f" → Diminuez le seuil ou vérifiez aggregate.json")
	return '<div style="background:#f8d7da;padding:20px;border-radius:8px;">❌ Aucun nœud ne passe le seuil</div>'

	# Créer un set des IDs valides
	valid_node_ids = {n['id'] for n in agg_nodes}

	# Filtrer les arêtes (garder seulement celles entre nœuds valides)
	agg_edges = [
	e for e in all_agg_edges
	if e['source'] in valid_node_ids and e['target'] in valid_node_ids
	]

	logging.info(f"📊 Arêtes conservées : {len(agg_edges)}")

	# Afficher les nœuds conservés
	logging.info(f"\n📝 Nœuds conservés (≥{frequency_threshold}%) :")
	for i, n in enumerate(sorted(agg_nodes, key=lambda x: x.get('freq', 0), reverse=True)[:10], 1):
	logging.info(f" {i}. [{n.get('freq', 0):.1f}%] {n['label']}")

	if len(agg_nodes) > 10:
	logging.info(f" ... et {len(agg_nodes) - 10} autres")

	logging.info("═" * 70)



	gt_labels = [n["label"] for n in gt_nodes]
	agg_labels = [n["label"] for n in agg_nodes]

	matches = hybrid_match_optimized(
	agg_labels, gt_labels,
	fuzzy_threshold=fuzzy_threshold,
	semantic_threshold=semantic_threshold,
	alpha=0.6
	)

	label_mapping = {}
	matched_gt_labels = set()

	for agg_lbl, (match, combined_score, fuzzy_score, sem_score) in zip(agg_labels, matches):
	if match and match not in matched_gt_labels:
	label_mapping[agg_lbl] = match
	matched_gt_labels.add(match)

	gt_labels_set = set(gt_labels)
	num_common_nodes = len(matched_gt_labels)
	num_missing_nodes = len(gt_labels_set) - num_common_nodes
	num_extra_nodes = len(agg_labels) - num_common_nodes

	node_precision = (num_common_nodes / len(agg_labels) * 100) if agg_labels else 0
	node_recall = (num_common_nodes / len(gt_labels) * 100) if gt_labels else 0
	node_f1 = (2 * node_precision * node_recall / (node_precision + node_recall)) if (
	node_precision + node_recall) > 0 else 0

	gt_id2lbl = {n["id"]: n["label"] for n in gt_nodes}
	agg_id2lbl = {n["id"]: n["label"] for n in agg_nodes}

	gt_edges_set = set()
	for e in gt_edges:
	src = gt_id2lbl.get(e["source"])
	tgt = gt_id2lbl.get(e["target"])
	if src and tgt:
	gt_edges_set.add(tuple(sorted([src, tgt])))

	agg_edges_set = set()
	for e in agg_edges:
	src = agg_id2lbl.get(e["source"])
	tgt = agg_id2lbl.get(e["target"])
	if src and tgt:
	src_mapped = label_mapping.get(src, src)
	tgt_mapped = label_mapping.get(tgt, tgt)

	if src_mapped in gt_labels_set and tgt_mapped in gt_labels_set:
	agg_edges_set.add(tuple(sorted([src_mapped, tgt_mapped])))

	common_edges = gt_edges_set & agg_edges_set
	missing_edges = gt_edges_set - agg_edges_set
	extra_edges = agg_edges_set - gt_edges_set

	edge_precision = (len(common_edges) / len(agg_edges_set) * 100) if agg_edges_set else 0
	edge_recall = (len(common_edges) / len(gt_edges_set) * 100) if gt_edges_set else 0
	edge_f1 = (2 * edge_precision * edge_recall / (edge_precision + edge_recall)) if (
	edge_precision + edge_recall) > 0 else 0

	global_score = (node_f1 + edge_f1) / 2

	# Logs finaux
	logging.info("═" * 70)
	logging.info("🔍 RÉSULTATS DE COMPARAISON")
	logging.info("═" * 70)
	logging.info(f"📊 Nœuds GT : {len(gt_labels)} \| Agrégé : {len(agg_labels)}")
	logging.info(f"📊 Matchés : {num_common_nodes} \| Manquants : {num_missing_nodes} \| En trop : {num_extra_nodes}")
	logging.info(
	f"📊 Arêtes GT : {len(gt_edges_set)} \| Agrégé : {len(agg_edges_set)} \| Communes : {len(common_edges)}")
	logging.info(f"🎯 Score global : {global_score:.1f}%")
	logging.info(f" • F1 nœuds : {node_f1:.1f}%")
	logging.info(f" • F1 arêtes : {edge_f1:.1f}%")
	logging.info("═" * 70)

	# Couleur selon le score
	if global_score >= 80:
	score_color = "#10b981"
	score_icon = "✅"
	score_label = "Excellent"
	elif global_score >= 60:
	score_color = "#3b82f6"
	score_icon = "👍"
	score_label = "Bon"
	elif global_score >= 40:
	score_color = "#f59e0b"
	score_icon = "⚠️"
	score_label = "Moyen"
	else:
	score_color = "#ef4444"
	score_icon = "❌"
	score_label = "Faible"

	# Générer le HTML (MÊME CODE QU'AVANT)
	metrics_html = f"""
	<div style="background: white; border: 2px solid #e0e0e0; border-radius: 12px; padding: 24px; margin: 20px 0; box-shadow: 0 4px 12px rgba(0,0,0,0.1);">

	<!-- Score Global -->
	<div style="text-align: center; margin-bottom: 24px; padding: 20px; background: linear-gradient(135deg, {score_color}15 0%, {score_color}05 100%); border-radius: 8px; border: 2px solid {score_color};">
	<div style="font-size: 3em; margin-bottom: 8px;">{score_icon}</div>
	<div style="font-size: 2.5em; font-weight: 700; color: {score_color}; margin-bottom: 4px;">{global_score:.1f}%</div>
	<div style="font-size: 1.1em; color: #555; font-weight: 600;">{score_label} - Correspondance avec le GT Original</div>
	<div style="font-size: 0.85em; color: #888; margin-top: 8px;">Marge d'amélioration : {100 - global_score:.1f}%</div>
	<div style="font-size: 0.75em; color: #999; margin-top: 4px; font-style: italic;">
	Seuils: Fréquence {frequency_threshold}% \| Fuzzy {fuzzy_threshold}% \| Sémantique {semantic_threshold}%
	</div>
	<div style="font-size: 0.7em; color: #666; margin-top: 4px; padding: 6px; background: #f5f5f5; border-radius: 4px;">
	ℹ️ Nœuds filtrés : {len(agg_nodes)}/{len(all_agg_nodes)} • Arêtes filtrées : {len(agg_edges)}/{len(all_agg_edges)}
	</div>
	</div>

	<!-- Métriques Détaillées -->
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 20px;">

	<!-- Nœuds -->
	<div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 4px solid #667eea;">
	<h4 style="margin: 0 0 12px 0; color: #667eea; font-size: 1em;">📊 Nœuds</h4>
	<div style="margin-bottom: 8px;">
	<span style="color: #888; font-size: 0.85em;">Précision</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {node_precision}%; height: 100%; background: #667eea;"></div>
	</div>
	<span style="font-weight: 700; color: #667eea; min-width: 50px;">{node_precision:.1f}%</span>
	</div>
	</div>
	<div style="margin-bottom: 8px;">
	<span style="color: #888; font-size: 0.85em;">Rappel</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {node_recall}%; height: 100%; background: #10b981;"></div>
	</div>
	<span style="font-weight: 700; color: #10b981; min-width: 50px;">{node_recall:.1f}%</span>
	</div>
	</div>
	<div>
	<span style="color: #888; font-size: 0.85em;">F1-Score</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {node_f1}%; height: 100%; background: #9b59b6;"></div>
	</div>
	<span style="font-weight: 700; color: #9b59b6; min-width: 50px;">{node_f1:.1f}%</span>
	</div>
	</div>
	</div>

	<!-- Arêtes -->
	<div style="background: #f8f9fa; padding: 16px; border-radius: 8px; border-left: 4px solid #f59e0b;">
	<h4 style="margin: 0 0 12px 0; color: #f59e0b; font-size: 1em;">🔗 Arêtes</h4>
	<div style="margin-bottom: 8px;">
	<span style="color: #888; font-size: 0.85em;">Précision</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {edge_precision}%; height: 100%; background: #667eea;"></div>
	</div>
	<span style="font-weight: 700; color: #667eea; min-width: 50px;">{edge_precision:.1f}%</span>
	</div>
	</div>
	<div style="margin-bottom: 8px;">
	<span style="color: #888; font-size: 0.85em;">Rappel</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {edge_recall}%; height: 100%; background: #10b981;"></div>
	</div>
	<span style="font-weight: 700; color: #10b981; min-width: 50px;">{edge_recall:.1f}%</span>
	</div>
	</div>
	<div>
	<span style="color: #888; font-size: 0.85em;">F1-Score</span>
	<div style="display: flex; align-items: center; gap: 8px;">
	<div style="flex: 1; height: 8px; background: #e0e0e0; border-radius: 4px; overflow: hidden;">
	<div style="width: {edge_f1}%; height: 100%; background: #9b59b6;"></div>
	</div>
	<span style="font-weight: 700; color: #9b59b6; min-width: 50px;">{edge_f1:.1f}%</span>
	</div>
	</div>
	</div>
	</div>

	<!-- Statistiques -->
	<div style="background: #fafafa; padding: 16px; border-radius: 8px;">
	<h4 style="margin: 0 0 12px 0; color: #555; font-size: 0.95em;">📈 Statistiques Détaillées</h4>
	<div style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 12px; font-size: 0.85em;">
	<div>
	<div style="color: #888;">GT Original</div>
	<div style="font-weight: 600; color: #2c3e50;">{len(gt_nodes)} nœuds, {len(gt_edges_set)} arêtes</div>
	</div>
	<div>
	<div style="color: #888;">Agrégé (filtré)</div>
	<div style="font-weight: 600; color: #2c3e50;">{len(agg_nodes)} nœuds, {len(agg_edges_set)} arêtes</div>
	</div>
	<div>
	<div style="color: #888;">Communs</div>
	<div style="font-weight: 600; color: #10b981;">{num_common_nodes} nœuds, {len(common_edges)} arêtes</div>
	</div>
	<div>
	<div style="color: #888;">Manquants</div>
	<div style="font-weight: 600; color: #ef4444;">{num_missing_nodes} nœuds, {len(missing_edges)} arêtes</div>
	</div>
	<div>
	<div style="color: #888;">En trop</div>
	<div style="font-weight: 600; color: #f59e0b;">{num_extra_nodes} nœuds, {len(extra_edges)} arêtes</div>
	</div>
	<div>
	<div style="color: #888;">Couverture</div>
	<div style="font-weight: 600; color: #3b82f6;">{node_recall:.1f}% du GT</div>
	</div>
	</div>
	</div>

	<!-- Recommandations -->
	<div style="margin-top: 16px; padding: 12px; background: #eff6ff; border-left: 4px solid #3b82f6; border-radius: 4px;">
	<div style="font-weight: 600; color: #1e40af; margin-bottom: 4px;">💡 Recommandations</div>
	<div style="font-size: 0.85em; color: #3b82f6;">
	{'✅ Excellent ! Le graphe agrégé est très fidèle au GT original.' if global_score >= 80 else
	'👍 Bon résultat. Ajustez les seuils de clustering pour améliorer la couverture.' if global_score >= 60 else
	f'⚠️ Résultat moyen. Diminuez le seuil de fréquence (actuellement {frequency_threshold}%) ou les seuils fuzzy/sémantique pour capturer plus de nœuds.' if global_score >= 40 else
	f'❌ Faible correspondance. Diminuez drastiquement le seuil de fréquence (actuellement {frequency_threshold}%) et les seuils fuzzy/sémantique.'}
	</div>
	</div>

	</div>
	"""

	return metrics_html

	except Exception as e:
	logging.error(f"❌ Erreur comparaison agrégé/GT : {e}")
	import traceback
	logging.error(traceback.format_exc())
	return f"""
	<div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#721c24;">❌ Erreur de comparaison</h3>
	<p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p>
	</div>
	"""


	# ============================================
	# SECTION 9 : FONCTIONS AUXILIAIRES POUR L'INTERFACE
	# ============================================
	def load_and_visualize_gt_original(gt_path=None):
	"""
	Charge le ground truth original et génère sa visualisation.

	Args:
	gt_path (str): Chemin vers le fichier JSON du GT original

	Returns:
	str: HTML iframe pour la visualisation
	"""
	# Utiliser le chemin par défaut si aucun chemin fourni
	if not gt_path:
	gt_path = GT_ORIGINAL_PATH

	if not os.path.exists(gt_path):
	error_html = f"""
	<div style="background:#fff3cd;border:1px solid #ffc107;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#856404;">⚠️ Fichier GT introuvable</h3>
	<p style="color:#856404;">Chemin : {gt_path}</p>
	<p style="color:#856404;font-size:0.9em;">Vérifiez que le fichier existe ou modifiez GT_ORIGINAL_PATH dans le code</p>
	</div>
	"""
	return error_html

	try:
	# Charger le JSON
	with open(gt_path, 'r', encoding='utf-8') as f:
	gt_data = json.load(f)

	logging.info(
	f"✅ GT original chargé: {len(gt_data.get('nodes', []))} nœuds, {len(gt_data.get('edges', []))} arêtes")

	# Sauvegarder temporairement pour la visualisation
	temp_gt_path = "temp_gt_original.json"
	with open(temp_gt_path, 'w', encoding='utf-8') as f:
	json.dump(gt_data, f, ensure_ascii=False, indent=2)

	# Générer l'iframe
	ts = int(time.time())
	src = f"http://127.0.0.1:8000/visual_gt_original.html?t={ts}"

	iframe_html = f"""
	<div style="border:1px solid #e0e0e0;border-radius:8px;overflow:hidden;background:white;">
	<div style="background:linear-gradient(135deg, #10b981 0%, #059669 100%);padding:12px;border-bottom:1px solid #e0e0e0;">
	<h4 style="margin:0;color:white;">
	📘 Ground Truth Original (Professeur)
	<span style="color:#d1fae5;font-size:0.85em;font-weight:normal;">
	({len(gt_data.get('nodes', []))} nœuds, {len(gt_data.get('edges', []))} arêtes)
	</span>
	</h4>
	<p style="margin:5px 0 0 0;font-size:0.75em;color:#d1fae5;">
	Référence : {os.path.basename(gt_path)}
	</p>
	</div>
	<iframe src="{src}"
	style="width:100%;height:700px;border:none;background:white;display:block;">
	</iframe>
	</div>
	"""

	return iframe_html

	except Exception as e:
	logging.error(f"❌ Erreur lors du chargement du GT: {e}")
	error_html = f"""
	<div style="background:#f8d7da;border:1px solid #f5c6cb;border-radius:8px;padding:20px;text-align:center;">
	<h3 style="color:#721c24;">❌ Erreur de chargement</h3>
	<p style="color:#721c24;font-family:monospace;font-size:0.9em;">{str(e)}</p>
	</div>
	"""
	return error_html

	def download_aggregated_graph():
	"""Télécharge le graphe agrégé au format JSON."""
	try:
	if os.path.exists("aggregated_graph.json"):
	return "aggregated_graph.json"
	else:
	return None
	except Exception as e:
	logging.error(f"Erreur lors du téléchargement du graphe agrégé: {e}")
	return None

	def generate_pdf_report():
	"""Génère un rapport PDF des résultats."""
	try:
	# Vérifier s'il y a des résultats
	if os.path.exists("reports"):
	report_files = glob.glob("reports/*.pdf")
	if report_files:
	return report_files[-1] # Dernier rapport généré

	# Sinon générer un rapport vide
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font("Arial", size=16)
	pdf.cell(200, 10, txt="Rapport Doc2GL", ln=True, align='C')
	pdf.set_font("Arial", size=12)
	pdf.cell(200, 10, txt="Aucune donnée à afficher", ln=True, align='C')

	os.makedirs("reports", exist_ok=True)
	report_path = "reports/rapport_vide.pdf"
	pdf.output(report_path)
	return report_path
	except Exception as e:
	logging.error(f"Erreur lors de la génération du rapport PDF: {e}")
	return None

	# ============================================
	# SECTION 10 : INTERFACE GRADIO
	# ============================================

	def create_auth_interface():
	"""Crée une interface d'authentification simple."""
	app_password = os.environ.get("APP_PASSWORD", "")

	def check_login(username, password):
	if not app_password:
	return True, "Accès autorisé"
	if username == "admin" and password == app_password:
	return True, "Accès autorisé"
	return False, "Identifiants incorrects"

	with gr.Blocks(
	title="Doc2GL - Connexion",
	css=""".auth-container {
	max-width: 400px;
	margin: 100px auto;
	padding: 40px;
	background: #18181b;
	border: 1px solid #27272a;
	border-radius: 16px;
	text-align: center;
	}
	.auth-title {
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 10px;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.auth-subtitle {
	color: #a1a1aa;
	margin-bottom: 30px;
	}
	""",
	theme=gr.themes.Soft()
	) as auth_demo:
	with gr.Column(elem_classes=["auth-container"]):
	gr.HTML('<h1 class="auth-title">🧠 Doc2GL</h1>')
	gr.HTML('<p class="auth-subtitle">Document to Graph Learning</p>')

	with gr.Column():
	username = gr.Textbox(
	label="Nom d'utilisateur",
	placeholder="admin",
	value="admin"
	)
	password = gr.Textbox(
	label="Mot de passe",
	type="password",
	placeholder="AZERTY123"
	)

	login_btn = gr.Button(
	"🔐 Se connecter",
	variant="primary",
	size="lg"
	)

	result = gr.HTML("")

	login_btn.click(
	fn=check_login,
	inputs=[username, password],
	outputs=result
	)

	return auth_demo

	def gradio_interface():
	"""Point d'entrée principal pour l'interface Gradio."""
	# Vérifier si un mot de passe est configuré
	app_password = os.environ.get("APP_PASSWORD", "")

	# ═════════════════════════════════════════════════════════════════════
	# CSS PERSONNALISÉ (Variables CSS pour personnalisation facile)
	# ═════════════════════════════════════════════════════════════════════
	custom_css = r"""
	@import url('https://fonts.googleapis.com/css2?family=Inter:ital,opsz,wght@0,14..32,100..900;1,14..32,100..900&display=swap');

	/* ===== GLOBAL RESET ===== */
	, ::before, *::after { box-sizing: border-box; margin: 0; }
	:root {
	--bg: #09090b;
	--surface: #18181b;
	--surface-2: #1f1f23;
	--surface-3: #27272a;
	--border: #27272a;
	--border-light: #3f3f46;
	--text: #fafafa;
	--text-2: #a1a1aa;
	--text-3: #71717a;
	--text-4: #52525b;
	--violet-400: #a78bfa;
	--violet-500: #8b5cf6;
	--violet-600: #7c3aed;
	--violet-900: rgba(139,92,246,0.12);
	--emerald-400: #34d399;
	--emerald-500: #10b981;
	--emerald-600: #059669;
	--emerald-900: rgba(52,211,153,0.12);
	--amber-900: rgba(245,158,11,0.15);
	--amber-400: #fbbf24;
	--font: 'Inter', system-ui, -apple-system, sans-serif;
	}

	/* ===== AUTH CONTAINER ===== */
	.auth-container {
	max-width: 400px;
	margin: 100px auto;
	padding: 40px;
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 16px;
	text-align: center;
	}
	.auth-title {
	font-size: 2.5em;
	font-weight: bold;
	margin-bottom: 10px;
	background: linear-gradient(135deg, var(--violet-400), var(--emerald-400));
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.auth-subtitle {
	color: var(--text-3);
	margin-bottom: 30px;
	}

	/* ===== BASE ===== */
	body {
	background: var(--bg) !important;
	color: var(--text) !important;
	font-family: var(--font) !important;
	-webkit-font-smoothing: antialiased;
	}
	.gradio-container {
	background: var(--bg) !important;
	max-width: 1100px !important;
	margin: 0 auto !important;
	padding: 0 32px 60px !important;
	font-family: var(--font) !important;
	color: var(--text) !important;
	}

	/* ===== HIDE GRADIO CHROME ===== */
	footer, .built-with, .show-api, .svelte-1ed2p3z,
	.gradio-container > .prose,
	#component-0 > .prose { display: none !important; }
	.gradio-container .gap { gap: 16px !important; }
	.block { border: none !important; box-shadow: none !important; background: transparent !important; padding: 0 !important; }
	.form { background: transparent !important; border: none !important; gap: 20px !important; }
	.panel { background: transparent !important; border: none !important; }

	/* ===== HERO ===== */
	.hero {
	position: relative;
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 28px;
	padding: 72px 48px 64px;
	margin: 16px 0 40px;
	text-align: center;
	overflow: hidden;
	}
	.hero::before {
	content: '';
	position: absolute; inset: 0;
	background:
	radial-gradient(ellipse 80% 60% at 50% -10%, rgba(139,92,246,0.25), transparent),
	radial-gradient(ellipse 60% 50% at 80% 110%, rgba(52,211,153,0.1), transparent);
	pointer-events: none;
	}
	.hero-badge {
	display: inline-flex; align-items: center; gap: 8px;
	background: rgba(255,255,255,0.05);
	border: 1px solid var(--border-light);
	padding: 7px 18px;
	border-radius: 100px;
	font-size: 0.8rem; font-weight: 500;
	color: var(--text-2);
	margin-bottom: 24px;
	}
	.hero-badge .live {
	width: 8px; height: 8px;
	background: var(--emerald-400);
	border-radius: 50%;
	box-shadow: 0 0 8px var(--emerald-400);
	animation: blink 2.5s ease-in-out infinite;
	}
	@keyframes blink {
	0%,100% { opacity:1; } 50% { opacity:0.3; }
	}
	.hero h1 {
	color: var(--text) !important;
	font-size: 3rem;
	font-weight: 800;
	letter-spacing: -0.04em;
	line-height: 1;
	margin: 0 0 12px;
	}
	.hero h1 span {
	background: linear-gradient(135deg, var(--violet-400), var(--emerald-400));
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	}
	.hero p {
	color: var(--text-3) !important;
	font-size: 1.1rem;
	font-weight: 400;
	margin: 0;
	line-height: 1.6;
	}

	/* ===== CARDS ===== */
	.card-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; margin-bottom: 24px; }
	.card {
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 16px;
	padding: 24px;
	transition: all 0.15s ease;
	}
	.card:hover { border-color: var(--border-light); box-shadow: 0 4px 20px rgba(0,0,0,0.3); }
	.card-icon {
	width: 40px; height: 40px;
	border-radius: 12px;
	display: flex; align-items: center; justify-content: center;
	font-size: 1.1rem;
	margin-bottom: 14px;
	}
	.card-icon.violet { background: var(--violet-900); }
	.card-icon.emerald { background: var(--emerald-900); }
	.card h3 { font-size: 0.92rem; font-weight: 700; color: var(--text); margin: 0 0 6px; }
	.card p { font-size: 0.82rem; color: var(--text-3); line-height: 1.55; margin: 0; }

	/* ===== SECTION ===== */
	.section-wrap {
	background: var(--surface);
	border: 1px solid var(--border);
	border-radius: 20px;
	padding: 28px;
	margin: 24px 0;
	}
	.section-label {
	display: inline-block;
	font-size: 0.68rem; font-weight: 700;
	text-transform: uppercase; letter-spacing: 0.08em;
	padding: 4px 12px;
	border-radius: 100px;
	margin-bottom: 16px;
	}
	.section-label.optional { background: var(--amber-900); color: var(--amber-400); }
	.section-label.results { background: var(--violet-900); color: var(--violet-400); }
	.section-title {
	font-size: 1.05rem; font-weight: 700; color: var(--text);
	margin: 0 0 4px;
	}
	.section-desc {
	font-size: 0.84rem; color: var(--text-3); margin: 0 0 20px; line-height: 1.55;
	}

	/* ===== BUTTONS ===== */
	button, .gr-button { font-family: var(--font) !important; cursor: pointer; }
	button.primary-button, .primary-button > button {
	background: var(--text) !important;
	color: var(--bg) !important;
	font-size: 0.92rem !important;
	padding: 13px 28px !important;
	font-weight: 600 !important;
	border-radius: 12px !important;
	border: none !important;
	box-shadow: 0 0 0 1px rgba(255,255,255,0.1) !important;
	transition: all 0.15s ease !important;
	letter-spacing: -0.01em !important;
	}
	button.primary-button:hover, .primary-button > button:hover {
	background: #e4e4e7 !important;
	box-shadow: 0 4px 16px rgba(255,255,255,0.06) !important;
	transform: translateY(-1px) !important;
	}
	button.primary-button:active, .primary-button > button:active {
	transform: translateY(0) !important;
	}
	button.secondary-button, .secondary-button > button {
	background: var(--surface-2) !important;
	color: var(--text-2) !important;
	font-weight: 600 !important;
	border-radius: 12px !important;
	border: 1px solid var(--border) !important;
	box-shadow: none !important;
	transition: all 0.15s ease !important;
	font-size: 0.88rem !important;
	}
	button.secondary-button:hover, .secondary-button > button:hover {
	background: var(--surface-3) !important;
	color: var(--text) !important;
	border-color: #52525b !important;
	}

	/* ===== TABS ===== */
	.tabs { margin-top: 0 !important; }
	.tabs > .tab-nav {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: 14px !important;
	padding: 5px !important;
	gap: 4px !important;
	margin-bottom: 16px !important;
	display: inline-flex !important;
	}
	.tabs > .tab-nav > button {
	background: transparent !important;
	color: var(--text-3) !important;
	border: none !important;
	border-radius: 10px !important;
	padding: 10px 20px !important;
	font-weight: 600 !important;
	font-size: 0.84rem !important;
	transition: all 0.12s ease !important;
	}
	.tabs > .tab-nav > button.selected {
	background: var(--surface-2) !important;
	color: var(--text) !important;
	}
	.tabitem {
	background: var(--surface) !important;
	border-radius: 20px !important;
	padding: 32px !important;
	border: 1px solid var(--border) !important;
	}

	/* ===== FORM ELEMENTS ===== */
	input, textarea, select {
	background: var(--surface-2) !important;
	color: var(--text) !important;
	border: 1px solid var(--border) !important;
	border-radius: 10px !important;
	font-family: var(--font) !important;
	font-size: 0.9rem !important;
	padding: 10px 14px !important;
	transition: all 0.12s ease !important;
	}
	input:focus, textarea:focus, select:focus {
	border-color: var(--violet-500) !important;
	box-shadow: 0 0 0 2px rgba(139,92,246,0.1) !important;
	outline: none !important;
	}
	.wrap { background: var(--surface-2) !important; border: 1px solid var(--border) !important; border-radius: 10px !important; }
	label, .label-wrap > span {
	color: var(--text-2) !important;
	font-weight: 600 !important;
	font-size: 0.82rem !important;
	}
	.gr-check-radio { accent-color: var(--violet-500) !important; }
	.gradio-slider input[type="range"] { accent-color: var(--violet-500) !important; }
	.info { color: var(--text-4) !important; font-size: 0.78rem !important; }

	/* ===== ACCORDION ===== */
	.gradio-accordion {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: 14px !important;
	margin: 16px 0 !important;
	overflow: hidden;
	}
	.gradio-accordion > .label-wrap {
	padding: 16px 20px !important;
	font-weight: 600 !important;
	background: var(--surface-2) !important;
	}

	/* ===== HELP NOTES ===== */
	.help-note { display: none; }

	/* ===== GRAPH PANELS ===== */
	.graph-panel-header {
	padding: 14px 20px;
	border-radius: 14px 14px 0 0;
	text-align: center;
	font-weight: 700;
	font-size: 0.88rem;
	letter-spacing: -0.01em;
	}
	.graph-panel-header.gt {
	background: linear-gradient(135deg, var(--emerald-500), var(--emerald-600));
	color: white;
	}
	.graph-panel-header.test {
	background: linear-gradient(135deg, var(--violet-500), var(--violet-600));
	color: white;
	}
	.graph-panel-header.agg {
	background: linear-gradient(135deg, #f59e0b, #d97706);
	color: white;
	}
	.graph-placeholder {
	background: var(--surface-2);
	border: 2px dashed var(--border);
	border-radius: 14px;
	padding: 48px;
	text-align: center;
	color: var(--text-4);
	display: flex;
	flex-direction: column;
	align-items: center; justify-content: center;
	gap: 16px;
	font-size: 0.88rem; font-weight: 500;
	}
	.graph-placeholder svg {
	width: 40px; height: 40px;
	stroke: var(--text-4);
	}

	/* ===== FILE UPLOAD ===== */
	.gradio-file, .gradio-files { border-radius: 14px !important; }

	/* ===== GALLERY & PLOTS ===== */
	.gradio-gallery { border-radius: 14px !important; overflow: hidden; border: 1px solid var(--border) !important; }
	.gradio-plot { background: var(--surface) !important; border-radius: 14px !important; border: 1px solid var(--border) !important; }

	/* ===== SCROLLBAR ===== */
	::-webkit-scrollbar { width: 5px; height: 5px; }
	::-webkit-scrollbar-track { background: transparent; }
	::-webkit-scrollbar-thumb { background: var(--surface-3); border-radius: 10px; }

	/* ===== RESPONSIVE ===== */
	@media (max-width: 768px) {
	.card-grid { grid-template-columns: 1fr; }
	.hero h1 { font-size: 2rem; }
	.hero { padding: 48px 24px 40px; border-radius: 20px; }
	.gradio-container { padding: 0 16px 40px !important; }
	}
	"""

	# ═════════════════════════════════════════════════════════════════════
	# CONSTRUCTION DE L'INTERFACE (AVEC OU SANS AUTH)
	# ═════════════════════════════════════════════════════════════════════
	with gr.Blocks(title="Doc2GL", css=custom_css, theme=gr.themes.Base()) as demo:
	# État d'authentification
	authenticated = gr.State(False)

	# ═════════════════════════════════════════════════════════════════════
	# CONTENEUR D'AUTHENTIFICATION (visible si non authentifié)
	# ═════════════════════════════════════════════════════════════════════
	with gr.Column(visible=bool(app_password), elem_id="login_container") as login_container:
	with gr.Column(elem_classes=["auth-container"]):
	gr.HTML('<h1 class="auth-title">🧠 Doc2GL</h1>')
	gr.HTML('<p class="auth-subtitle">Document to Graph Learning</p>')

	with gr.Column():
	username = gr.Textbox(
	label="Nom d'utilisateur",
	placeholder="admin",
	value="admin"
	)
	password = gr.Textbox(
	label="Mot de passe",
	type="password",
	placeholder="Entrez votre mot de passe"
	)

	login_btn = gr.Button(
	"🔐 Se connecter",
	variant="primary",
	size="lg"
	)

	auth_result = gr.HTML("")

	# ═════════════════════════════════════════════════════════════════════
	# CONTENEUR PRINCIPAL (visible si authentifié ou si pas de mot de passe)
	# ═════════════════════════════════════════════════════════════════════
	with gr.Column(visible=not bool(app_password), elem_id="app_container") as app_container:
	# Variable d'état (non utilisée dans ce code mais disponible)
	redirect_state = gr.State("")

	# ─────────────────────────────────────────────────────────────────
	# HEADER
	# ─────────────────────────────────────────────────────────────────
	gr.HTML('''
	<div class="hero">
	<div class="hero-badge"><span class="live"></span> Document Intelligence</div>
	<h1>Doc<span>2</span>GL</h1>
	<p>Transformez vos documents en graphes de connaissances intelligents</p>
	</div>
	''')

	# ─────────────────────────────────────────────────────────────────
	# ÉTAPES 1-2: Upload et Modèle
	# ─────────────────────────────────────────────────────────────────
	gr.HTML('''
	<div class="card-grid">
	<div class="card">
	<div class="card-icon violet">📄</div>
	<h3>Importez vos documents</h3>
	<p>Glissez vos PDF ou images ci-dessous pour commencer l'analyse.</p>
	</div>
	<div class="card">
	<div class="card-icon emerald">🤖</div>
	<h3>Choisissez votre modèle</h3>
	<p>Sélectionnez le modèle d'IA le plus adapté à vos documents.</p>
	</div>
	</div>
	''')

	# Zone d'upload et sélection du modèle (côte à côte)
	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.Files(
	label="📁 Documents (PDF/Images)",
	file_types=[".pdf", ".jpg", ".jpeg", ".png"]
	)
	with gr.Column(scale=1):
	model_selector = gr.Dropdown(
	label="🤖 Modèle d'IA",
	choices=[
	"Gemini 1.5 Flash",
	"Gemma 2 9B",
	"LLaMA 3 8B",
	"Qwen 2 7B",
	"NVIDIA Nemotron Nano 12B",
	"InternVL 2 8B"
	],
	value="Gemini 1.5 Flash",
	info="Choisissez le modèle pour générer les graphes"
	)

	# Options de traitement
	with gr.Row():
	use_semantic_checkbox = gr.Checkbox(
	label="🧠 Utiliser la similarité sémantique",
	value=True,
	info="Combine fuzzy matching et embeddings pour une meilleure précision"
	)

	with gr.Accordion("⚙️ Paramètres avancés", open=False):
	with gr.Row():
	fuzzy_threshold_slider = gr.Slider(
	label="🎯 Seuil fuzzy (%)",
	minimum=60,
	maximum=100,
	value=80,
	step=5,
	info="Seuil de similarité textuelle (fuzzy matching)"
	)
	semantic_threshold_slider = gr.Slider(
	label="🔍 Seuil sémantique (%)",
	minimum=50,
	maximum=90,
	value=70,
	step=5,
	info="Seuil de similarité sémantique (embeddings)"
	)

	with gr.Row():
	alpha_slider = gr.Slider(
	label="⚖️ Alpha (poids sémantique)",
	minimum=0.0,
	maximum=1.0,
	value=0.6,
	step=0.1,
	info="Poids du matching sémantique vs fuzzy (0=fuzzy seul, 1=sémantique seul)"
	)
	edge_mode_radio = gr.Radio(
	label="🔗 Mode de détection des arêtes",
	choices=["hybrid", "co-occurrence", "semantic"],
	value="hybrid",
	info="Stratégie pour détecter les relations entre entités"
	)

	# Bouton de génération
	generate_btn = gr.Button(
	"🚀 Générer les graphes",
	variant="primary",
	size="lg",
	elem_classes=["primary-button"]
	)

	# ─────────────────────────────────────────────────────────────────
	# RÉSULTATS (Galerie + Mermaid)
	# ─────────────────────────────────────────────────────────────────
	gr.HTML('''
	<div class="section-wrap">
	<span class="section-label results">📊 Résultats</span>
	<h3 class="section-title">Graphes générés</h3>
	<p class="section-desc">Visualisez les graphes extraits de vos documents</p>
	</div>
	''')

	image_preview = gr.Gallery(label="📷 Pages analysées", show_label=True, elem_id="gallery", columns=2, height="auto", allow_preview=True)
	mermaid_output = gr.Textbox(label="📝 Code Mermaid", lines=10, interactive=True, placeholder="Le code Mermaid apparaîtra ici après génération...")

	# Dropdown pour sélectionner un document
	docs_dropdown = gr.Dropdown(
	label="📄 Sélectionner un document",
	choices=[],
	info="Choisissez un document pour visualiser son graphe détaillé"
	)

	# Conteneur pour les iframes (stockage interne)
	hidden_iframes = gr.State({})

	# Panneaux pour les graphes GT et Test
	with gr.Row():
	with gr.Column():
	gt_graph_panel = gr.HTML(
	value='<div class="graph-placeholder">Sélectionnez un document pour voir le graphe GT</div>',
	label="📊 Graphe Ground Truth"
	)
	with gr.Column():
	test_graph_panel = gr.HTML(
	value='<div class="graph-placeholder">Sélectionnez un document pour voir le graphe Test</div>',
	label="🔬 Graphe Généré"
	)

	# Résumé des performances
	comparison_result_html = gr.HTML(
	value='<div style="text-align:center;padding:24px;color:var(--text-4);font-size:0.86rem;font-weight:500;">Les métriques apparaîtront ici après génération</div>',
	label="📈 Résumé"
	)

	# Statut du rapport
	report_status = gr.HTML(
	value="",
	visible=False
	)

	# ─────────────────────────────────────────────────────────────────
	# ONGLETS : Vue agrégée vs GT Original + Performances
	# ─────────────────────────────────────────────────────────────────
	with gr.Tabs(elem_classes=["tabs"]):
	with gr.TabItem("📊 Vue agrégée"):
	gr.HTML('''
	<div class="section-wrap">
	<span class="section-label optional">🔗 Vue agrégée</span>
	<h3 class="section-title">Graphe consolidé</h3>
	<p class="section-desc">Vue d'ensemble de tous les graphes combinés</p>
	</div>
	''')

	aggregated_graph = gr.HTML(
	value='<div class="graph-placeholder">Les graphes agrégés apparaîtront ici après traitement</div>',
	label="🔗 Graphe agrégé"
	)

	with gr.Row():
	aggregate_btn = gr.Button(
	"🔗 Agréger les graphes",
	variant="secondary",
	elem_classes=["secondary-button"]
	)
	download_agg_btn = gr.Button(
	"📥 Télécharger (JSON)",
	variant="secondary",
	elem_classes=["secondary-button"]
	)

	with gr.TabItem("📈 Performances"):
	gr.HTML('''
	<div class="section-wrap">
	<span class="section-label results">📈 Performances</span>
	<h3 class="section-title">Métriques détaillées</h3>
	<p class="section-desc">Analyse quantitative de la qualité des graphes générés</p>
	</div>
	''')

	performance_plot = gr.Plot(
	label="📊 Boxplots des métriques",
	visible=False
	)

	download_report_btn = gr.Button(
	"📄 Générer le rapport PDF",
	variant="primary",
	elem_classes=["primary-button"]
	)

	# ─────────────────────────────────────────────────────────────────
	# GESTION DES ÉVÉNEMENTS
	# ─────────────────────────────────────────────────────────────────

	# Authentification
	def check_login(username, password):
	if not app_password:
	return True, gr.update(visible=False), gr.update(visible=True)
	if username == "admin" and password == app_password:
	return True, gr.update(visible=False), gr.update(visible=True)
	return False, gr.update(visible=True), gr.update(visible=False)

	def show_auth_error(success):
	if success:
	return ""
	return '<div style="color: #ef4444; text-align: center; margin-top: 10px;">❌ Identifiants incorrects</div>'

	login_btn.click(
	fn=check_login,
	inputs=[username, password],
	outputs=[authenticated, login_container, app_container]
	).then(
	fn=show_auth_error,
	inputs=[authenticated],
	outputs=[auth_result]
	)

	# Génération des graphes
	generate_btn.click(
	fn=process_files,
	inputs=[
	file_input,
	model_selector,
	use_semantic_checkbox,
	fuzzy_threshold_slider,
	semantic_threshold_slider,
	alpha_slider,
	edge_mode_radio
	],
	outputs=[
	image_preview,
	mermaid_output,
	docs_dropdown,
	hidden_iframes,
	gt_graph_panel,
	test_graph_panel,
	comparison_result_html,
	report_status
	]
	)

	# Sélection de document
	docs_dropdown.change(
	fn=select_graph,
	inputs=[docs_dropdown, hidden_iframes],
	outputs=[gt_graph_panel, test_graph_panel]
	)

	# Comparaison avec GT original
	with gr.Row():
	gt_file_input = gr.File(
	label="📁 Ground Truth original (JSON)",
	file_types=[".json"],
	visible=False
	)
	compare_with_gt_btn = gr.Button(
	"🔍 Comparer avec GT original",
	variant="secondary",
	elem_classes=["secondary-button"],
	visible=False
	)

	# Téléchargements
	download_agg_btn.click(
	fn=download_aggregated_graph,
	inputs=[],
	outputs=[gr.File()]
	)

	download_report_btn.click(
	fn=generate_pdf_report,
	inputs=[],
	outputs=[gr.File()]
	)

	# ═════════════════════════════════════════════════════════════════════
	# LANCEMENT DE L'INTERFACE
	# ═════════════════════════════════════════════════════════════════════
	demo.launch(show_api=False)

	# ============================================
	# POINT D'ENTRÉE DU PROGRAMME
	# ============================================

	def start_http_server(port=8000):
	"""Lance un serveur HTTP en arrière-plan pour servir les fichiers statiques
	(visual.html, neovis.js, graph_diff JSON, etc.) nécessaires aux iframes."""
	import http.server
	import socketserver
	import threading

	handler = http.server.SimpleHTTPRequestHandler

	try:
	httpd = socketserver.TCPServer(("", port), handler)
	thread = threading.Thread(target=httpd.serve_forever, daemon=True)
	thread.start()
	logging.info(f"Serveur HTTP démarré sur le port {port}")
	except Exception as e:
	logging.warning(f"Impossible de démarrer le serveur HTTP sur le port {port}: {e}")

	if __name__ == "__main__":
	# Vérifier l'authentification si APP_PASSWORD est configuré
	app_password = os.environ.get("APP_PASSWORD", "")

	if app_password:
	# Lancer l'interface avec authentification
	start_http_server(8000)
	gradio_interface()
	else:
	# Pas de mot de passe, lancer directement l'interface principale
	start_http_server(8000)
	gradio_interface()