Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from utils import calculer_benford_stats_et_graphique | |
| # Variable globale pour mettre en cache le DataFrame | |
| df_cache = {"df": None, "filename": None} | |
| def charger_fichier(fichier): | |
| """Charge le fichier une seule fois et le met en cache.""" | |
| global df_cache | |
| if fichier is None: | |
| df_cache = {"df": None, "filename": None} | |
| return ( | |
| gr.Dropdown(choices=[], value=None), | |
| gr.Dropdown(choices=["Aucun"], value="Aucun"), | |
| gr.Dropdown(choices=[], value=None, visible=False), | |
| "<p style='color: #94a3b8; font-size: 13px; margin: 0;'>En attente...</p>", | |
| None | |
| ) | |
| try: | |
| # Charger seulement si c'est un nouveau fichier | |
| if df_cache["filename"] != fichier.name: | |
| if fichier.name.endswith('.csv'): | |
| df_cache["df"] = pd.read_csv(fichier.name) | |
| elif fichier.name.endswith(('.xls', '.xlsx')): | |
| df_cache["df"] = pd.read_excel(fichier.name) | |
| else: | |
| return ( | |
| gr.Dropdown(choices=[], value=None), | |
| gr.Dropdown(choices=["Aucun"], value="Aucun"), | |
| gr.Dropdown(choices=[], value=None, visible=False), | |
| "<p style='color: #ef4444; font-size: 13px;'>❌ Format invalide</p>", | |
| None | |
| ) | |
| df_cache["filename"] = fichier.name | |
| df = df_cache["df"] | |
| # Colonnes numériques | |
| colonnes_num = df.select_dtypes(include=np.number).columns.tolist() | |
| # Toutes les colonnes pour filtre | |
| toutes_colonnes = ["Aucun"] + df.columns.tolist() | |
| message = f"<p style='color: #10b981; font-size: 13px; margin: 0;'>✅ {len(df):,} lignes</p>" | |
| return ( | |
| gr.Dropdown(choices=colonnes_num, value=colonnes_num[0] if colonnes_num else None), | |
| gr.Dropdown(choices=toutes_colonnes, value="Aucun"), | |
| gr.Dropdown(choices=[], value=None, visible=False), | |
| message, | |
| None | |
| ) | |
| except Exception as e: | |
| df_cache = {"df": None, "filename": None} | |
| return ( | |
| gr.Dropdown(choices=[], value=None), | |
| gr.Dropdown(choices=["Aucun"], value="Aucun"), | |
| gr.Dropdown(choices=[], value=None, visible=False), | |
| f"<p style='color: #ef4444; font-size: 13px;'>❌ Erreur</p>", | |
| None | |
| ) | |
| def mettre_a_jour_valeurs_filtre(colonne_filtre): | |
| """Met à jour les valeurs du filtre instantanément.""" | |
| global df_cache | |
| if df_cache["df"] is None or not colonne_filtre or colonne_filtre == "Aucun": | |
| return gr.Dropdown(choices=[], value=None, visible=False) | |
| try: | |
| df = df_cache["df"] | |
| if colonne_filtre in df.columns: | |
| valeurs = sorted(df[colonne_filtre].dropna().unique().tolist()) | |
| valeurs_str = [str(v) for v in valeurs] | |
| return gr.Dropdown(choices=valeurs_str, value=valeurs_str[0] if valeurs_str else None, visible=True, label=f"Valeur de '{colonne_filtre}'") | |
| return gr.Dropdown(choices=[], value=None, visible=False) | |
| except: | |
| return gr.Dropdown(choices=[], value=None, visible=False) | |
| def tester_loi_benford(colonne_nom, colonne_filtre, valeur_filtre): | |
| """Lance le test (utilise le cache, ultra rapide).""" | |
| global df_cache | |
| if df_cache["df"] is None: | |
| return "<p style='color: #ef4444;'>❌ Veuillez d'abord charger un fichier.</p>", None | |
| df = df_cache["df"].copy() | |
| # Application du filtre | |
| df_original_size = len(df) | |
| if colonne_filtre and colonne_filtre != "Aucun" and valeur_filtre: | |
| try: | |
| valeur_convertie = valeur_filtre | |
| if pd.api.types.is_numeric_dtype(df[colonne_filtre]): | |
| valeur_convertie = pd.to_numeric(valeur_filtre, errors='ignore') | |
| df = df[df[colonne_filtre] == valeur_convertie] | |
| filtre_info = f" (Filtré : {colonne_filtre} = {valeur_filtre}, {len(df):,}/{df_original_size:,} lignes)" | |
| except: | |
| return f"<p style='color: #ef4444;'>❌ Erreur de filtrage.</p>", None | |
| else: | |
| filtre_info = "" | |
| if colonne_nom not in df.columns: | |
| return f"<p style='color: #ef4444;'>❌ Colonne '{colonne_nom}' introuvable.</p>", None | |
| # Préparation des données | |
| data_a_tester = df[colonne_nom].dropna() | |
| try: | |
| data_a_tester = pd.to_numeric(data_a_tester, errors='coerce').dropna() | |
| data_a_tester = data_a_tester[data_a_tester > 0] | |
| except: | |
| return f"<p style='color: #ef4444;'>❌ Colonne non numérique.</p>", None | |
| N = len(data_a_tester) | |
| # En-tête moderne et compact | |
| # html_output = f""" | |
| # <div style='background: linear-gradient(135deg, #3b82f6 0%, #1d4ed8 100%); padding: 15px 20px; border-radius: 10px; color: white; margin-bottom: 12px;'> | |
| # <h2 style='margin: 0; font-size: 20px; font-weight: 700;'>🔎 {colonne_nom}</h2> | |
| # <p style='margin: 5px 0 0 0; opacity: 0.95; font-size: 14px;'><strong>{N:,}</strong> obs{filtre_info}</p> | |
| # </div> | |
| # """ | |
| # Vérification des Conditions - Version compacte | |
| html_output = "<div style='display: grid; gap: 8px; margin: 10px 0;'>" | |
| conditions_valides = True | |
| if N < 50: | |
| html_output += "<div style='background: #fef2f2; border-left: 3px solid #ef4444; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #991b1b;'>❌ {N} obs < 50 min</strong>" | |
| html_output += "</div>" | |
| conditions_valides = False | |
| elif N < 1000: | |
| html_output += "<div style='background: #eff6ff; border-left: 3px solid #3b82f6; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #1e40af;'>ℹ️ {N:,} Nombre d'observations faible (idéal > 1000)</strong>" | |
| html_output += "</div>" | |
| else: | |
| html_output += "<div style='background: #f0fdf4; border-left: 3px solid #10b981; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #065f46;'>✅ {N:,} Nombre d'observations suffisant</strong>" | |
| html_output += "</div>" | |
| # --- Ordres de grandeur (Mise à jour selon l'échelle détaillée) --- | |
| if N > 1: | |
| min_val = data_a_tester.min() | |
| max_val = data_a_tester.max() | |
| #html_output += "<h4>Couverture des Ordres de Grandeur (Log10)</h4>" # Nouvelle sous-section | |
| if min_val > 0 and max_val > 0: | |
| orders_of_magnitude = np.log10(max_val) - np.log10(min_val) | |
| # Utilisation de l'échelle d'interprétation | |
| if orders_of_magnitude >= 3.0: | |
| # Très bonne pertinence (Idéale) | |
| html_output += "<div style='background: #f0fdf4; border-left: 3px solid #10b981; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #065f46;'>✅ {orders_of_magnitude:.1f} Magnitude des valeurs idéale</strong>" | |
| html_output += "</div>" | |
| elif orders_of_magnitude >= 2.0: | |
| # Bonne pertinence (Seuil minimum recommandé) | |
| html_output += "<div style='background: #f0fdf4; border-left: 3px solid #10b981; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #065f46;'>✅ {orders_of_magnitude:.1f} Magnitude des valeurs suffisante</strong>" | |
| html_output += "</div>" | |
| elif orders_of_magnitude >= 1.0: | |
| # Faible pertinence (Attention requise) | |
| html_output += "<div style='background: #fffbe3; border-left: 3px solid #f59e0b; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #92400e;'>⚠️ {orders_of_magnitude:.1f} Magnitude des valeurs faible</strong>" | |
| html_output += "</div>" | |
| conditions_valides = False | |
| else: | |
| # Très mauvaise pertinence (Non applicable) | |
| html_output += "<div style='background: #fef2f2; border-left: 3px solid #ef4444; padding: 8px; border-radius: 4px; font-size: 13px;'>" | |
| html_output += f"<strong style='color: #991b1b;'>❌ {orders_of_magnitude:.1f} Magnitude des valeurs insuffisante</strong>" | |
| html_output += "</div>" | |
| conditions_valides = False | |
| else: | |
| # Cas où min_val ou max_val est non positif, déjà traité par le filtre data_a_tester > 0 | |
| html_output += "<p style='color: red;'>Erreur de plage (min/max non positif).</p>" | |
| # La balise </div> de fermeture du bloc de code précédent a été supprimée, | |
| # elle doit être gérée par le conteneur HTML global (ici non affiché). | |
| html_output += "</div>" | |
| if not conditions_valides: | |
| html_output += "<div style='background: #fef2f2; border: 2px solid #ef4444; padding: 15px; border-radius: 8px; text-align: center; margin: 12px 0;'>" | |
| html_output += "<p style='color: #991b1b; font-weight: 700; font-size: 16px; margin: 0;'>⚠️ TEST NON APPLICABLE</p>" | |
| html_output += "</div>" | |
| return html_output, None | |
| return calculer_benford_stats_et_graphique(data_a_tester, colonne_nom, N, html_output) | |
| # --- Interface Gradio Optimisée --- | |
| theme = gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="emerald", | |
| neutral_hue="slate" | |
| ) | |
| with gr.Blocks(title="Test de la Loi de Benford", theme=theme, css=""" | |
| .gradio-container { max-width: 1600px !important; } | |
| footer { display: none !important; } | |
| .contain { gap: 8px !important; } | |
| h1 { font-size: 28px !important; margin: 10px 0 !important; } | |
| h3 { font-size: 16px !important; margin: 8px 0 !important; } | |
| .block { padding: 8px !important; } | |
| .svelte-12ioyct > span {font-size: 0.6em !important; } | |
| """) as demo: | |
| gr.Markdown("# 🔎 Détecteur Benford") | |
| with gr.Row(): | |
| # Colonne gauche : Contrôles (30%) | |
| with gr.Column(scale=4): | |
| fichier_input = gr.File( | |
| label="📁 Fichier", | |
| file_types=[".csv", ".xlsx", ".xls"], | |
| height=100 | |
| ) | |
| info_chargement = gr.HTML(value="<p style='color: #94a3b8; font-size: 13px; margin: 0;'>En attente...</p>") | |
| colonne_input = gr.Dropdown( | |
| label="📊 Colonne", | |
| choices=[], | |
| interactive=True, | |
| scale=1 | |
| ) | |
| with gr.Row(): | |
| colonne_filtre_input = gr.Dropdown( | |
| label="🔍 Filtre", | |
| choices=["Aucun"], | |
| value="Aucun", | |
| interactive=True, | |
| scale=1 | |
| ) | |
| valeur_filtre_input = gr.Dropdown( | |
| label="Valeur", | |
| choices=[], | |
| interactive=True, | |
| visible=False, | |
| scale=1 | |
| ) | |
| bouton_test = gr.Button("🚀 Analyser", variant="primary", size="sm") | |
| # Colonne droite : Graphique (70%) | |
| with gr.Column(scale=6): | |
| graphique_output = gr.Image(label="📈 Résultat", type="filepath", height=320, show_label=False) | |
| # Résultats en pleine largeur mais compact | |
| with gr.Row(): | |
| resultats_output = gr.HTML() | |
| # Événements | |
| fichier_input.change( | |
| fn=charger_fichier, | |
| inputs=[fichier_input], | |
| outputs=[colonne_input, colonne_filtre_input, valeur_filtre_input, info_chargement, graphique_output] | |
| ) | |
| colonne_filtre_input.change( | |
| fn=mettre_a_jour_valeurs_filtre, | |
| inputs=[colonne_filtre_input], | |
| outputs=[valeur_filtre_input] | |
| ) | |
| bouton_test.click( | |
| fn=tester_loi_benford, | |
| inputs=[colonne_input, colonne_filtre_input, valeur_filtre_input], | |
| outputs=[resultats_output, graphique_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |