Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import chisquare, chi2 | |
| import tempfile | |
| import matplotlib | |
| # Configuration matplotlib | |
| matplotlib.use('Agg') | |
| def extraire_premier_chiffre(val): | |
| """Extrait le premier chiffre significatif d'un nombre.""" | |
| if pd.isna(val) or val <= 0: | |
| return None | |
| val_str = str(abs(val)).replace('.', '').lstrip('0') | |
| if val_str: | |
| return int(val_str[0]) | |
| return None | |
| def calculer_benford_stats_et_graphique(data_a_tester, colonne_nom, N, html_output): | |
| """ | |
| Effectue les calculs statistiques et génère le graphique Benford. | |
| Retourne: (message_html_complet, chemin_graphique) | |
| """ | |
| # Fréquences théoriques de Benford | |
| benford_probs = [np.log10(1 + 1/d) for d in range(1, 10)] | |
| # --- Calcul des Statistiques --- | |
| first_digits = data_a_tester.apply(extraire_premier_chiffre).dropna() | |
| observed_counts = first_digits.value_counts().sort_index() | |
| # S'assurer que tous les chiffres 1-9 sont présents | |
| full_observed = [observed_counts.get(d, 0) for d in range(1, 10)] | |
| expected_counts = [p * len(first_digits) for p in benford_probs] | |
| # Calcul du MAD | |
| observed_freq = np.array(full_observed) / len(first_digits) | |
| expected_freq = np.array(benford_probs) | |
| mad_value = np.mean(np.abs(observed_freq - expected_freq)) | |
| # Test du Chi-carré | |
| chi2_stat, p_value = chisquare(full_observed, expected_counts) | |
| chi2_critique = chi2.ppf(0.95, 8) # Environ 15.5 | |
| # --- Création du Graphique Moderne --- | |
| fig, ax = plt.subplots(figsize=(10, 5.5)) | |
| digits = list(range(1, 10)) | |
| x = np.arange(len(digits)) | |
| width = 0.38 | |
| observed_pct = (np.array(full_observed) / len(first_digits)) * 100 | |
| benford_pct = np.array(benford_probs) * 100 | |
| # Couleurs douces et élégantes (Bleu océan + Corail doux) | |
| color_observed = '#3b82f6' # Bleu doux | |
| color_benford = '#f59e0b' # Ambre/Or | |
| bars1 = ax.bar(x - width/2, observed_pct, width, label='% Observé', alpha=0.85, color=color_observed, edgecolor='white', linewidth=1.5) | |
| bars2 = ax.bar(x + width/2, benford_pct, width, label='% Benford (théorique)', alpha=0.85, color=color_benford, edgecolor='white', linewidth=1.5) | |
| # Style moderne | |
| ax.set_xlabel('Premier chiffre', fontsize=13, fontweight='600', color='#1F2937') | |
| ax.set_ylabel('Fréquence (%)', fontsize=13, fontweight='600', color='#1F2937') | |
| ax.set_title(f'Test de Benford - {colonne_nom}', fontsize=13, fontweight='700', color='#111827', pad=12) | |
| ax.set_xticks(x) | |
| ax.set_xticklabels(digits, fontsize=10) | |
| ax.legend(fontsize=10, framealpha=0.95, edgecolor='#E5E7EB') | |
| ax.grid(axis='y', alpha=0.2, linestyle='--', linewidth=0.7) | |
| ax.set_facecolor('#FAFAFA') | |
| fig.patch.set_facecolor('white') | |
| # Sauvegarder le graphique | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png') | |
| plt.tight_layout() | |
| plt.savefig(temp_file.name, format='png', dpi=120, bbox_inches='tight', facecolor='white') | |
| plt.close(fig) | |
| # --- Affichage des Résultats (Design Compact et Moderne) --- | |
| # Style CSS moderne et compact | |
| html_output += """ | |
| <style> | |
| .stats-grid { | |
| display: grid; | |
| grid-template-columns: repeat(3, 1fr); | |
| gap: 10px; | |
| margin: 12px 0; | |
| } | |
| .stat-card { | |
| background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%); | |
| padding: 12px; | |
| border-radius: 8px; | |
| text-align: center; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
| color: white; | |
| } | |
| .stat-card.amber { background: linear-gradient(135deg, #f59e0b 0%, #d97706 100%); } | |
| .stat-card.emerald { background: linear-gradient(135deg, #10b981 0%, #059669 100%); } | |
| .stat-label { font-size: 11px; opacity: 0.9; font-weight: 500; margin-bottom: 3px; } | |
| .stat-value { font-size: 22px; font-weight: 700; } | |
| .conclusion-box { | |
| padding: 12px; | |
| border-radius: 8px; | |
| margin: 10px 0; | |
| border-left: 4px solid; | |
| font-size: 14px; | |
| } | |
| .conclusion-box.success { background: #f0fdf4; border-color: #10b981; color: #065f46; } | |
| .conclusion-box.error { background: #fef2f2; border-color: #ef4444; color: #991b1b; } | |
| .info-box { | |
| background: #f0f9ff; | |
| border-left: 3px solid #0284c7; | |
| padding: 10px; | |
| border-radius: 6px; | |
| margin: 8px 0; | |
| font-size: 13px; | |
| } | |
| .mad-badge { | |
| display: inline-block; | |
| padding: 4px 12px; | |
| border-radius: 16px; | |
| font-weight: 600; | |
| font-size: 12px; | |
| margin-top: 6px; | |
| } | |
| h3 { color: #111827; font-size: 15px; font-weight: 700; margin: 12px 0 6px 0; } | |
| </style> | |
| """ | |
| # Statistiques en cartes compactes | |
| html_output += "<h3>📊 Résultats</h3>" | |
| html_output += "<div class='stats-grid'>" | |
| html_output += f""" | |
| <div class='stat-card'> | |
| <div class='stat-label'>MAD</div> | |
| <div class='stat-value'>{mad_value:.4f}</div> | |
| </div> | |
| <div class='stat-card amber'> | |
| <div class='stat-label'>Chi-carré</div> | |
| <div class='stat-value'>{chi2_stat:.1f} ({chi2_stat/chi2_critique:.1f}x seuil)</div> | |
| </div> | |
| <div class='stat-card emerald'> | |
| <div class='stat-label'>P-value</div> | |
| <div class='stat-value'>{p_value:.4f}</div> | |
| </div> | |
| """ | |
| html_output += "</div>" | |
| # Conclusion compacte avec nuances intelligentes | |
| html_output += "<h3>🎯 Conclusion</h3>" | |
| if p_value < 0.05: | |
| html_output += "<div class='conclusion-box error'>" | |
| html_output += "<p style='font-weight: 700; font-size: 14px; margin: 0 0 6px 0;'>❌ Conformité REJETÉE (p={:.4f})</p>".format(p_value) | |
| # Nuance intelligente selon taille échantillon | |
| if N > 10000 and mad_value < 0.020: | |
| html_output += "<div class='info-box' style='margin-top: 8px;'>" | |
| html_output += f"<strong>⚠️ Contexte :</strong> Avec {N:,} obs, le test est hypersensible. " | |
| html_output += f"MAD de {mad_value:.4f} indique des écarts modérés (biais structurels probables)." | |
| html_output += "</div>" | |
| html_output += "</div>" | |
| else: | |
| html_output += "<div class='conclusion-box success'>" | |
| html_output += "<p style='font-weight: 700; font-size: 14px; margin: 0;'>✅ Conformité ACCEPTÉE (p={:.4f})</p>".format(p_value) | |
| html_output += "</div>" | |
| # Interprétation MAD compacte | |
| # html_output += "<h3>📈 Qualité (MAD)</h3>" | |
| # if mad_value < 0.006: | |
| # badge_style = "background: linear-gradient(135deg, #10b981, #059669); color: white;" | |
| # mad_text = "EXCELLENTE" | |
| # elif mad_value < 0.012: | |
| # badge_style = "background: linear-gradient(135deg, #22c55e, #16a34a); color: white;" | |
| # mad_text = "ACCEPTABLE" | |
| # elif mad_value < 0.015: | |
| # badge_style = "background: linear-gradient(135deg, #f59e0b, #d97706); color: white;" | |
| # mad_text = "MARGINALE" | |
| # elif mad_value < 0.020: | |
| # badge_style = "background: linear-gradient(135deg, #f97316, #ea580c); color: white;" | |
| # mad_text = "MODÉRÉE" | |
| # else: | |
| # badge_style = "background: linear-gradient(135deg, #ef4444, #dc2626); color: white;" | |
| # mad_text = "FORTE" | |
| # html_output += f"<span class='mad-badge' style='{badge_style}'>{mad_text}</span>" | |
| html_output += "<h3>🎯 Conformité MAD (Écart Absolu Moyen)</h3>" | |
| html_output += "<p style='font-size: 0.9em; color: #555;'>Le MAD mesure l'écart moyen entre les fréquences observées et les fréquences théoriques de Benford. Plus le MAD est **petit**, plus la conformité est **forte**.</p>" | |
| # Mise à jour des libellés pour indiquer la CONFORMITÉ (Inversé par rapport à la déviation) | |
| if mad_value < 0.006: | |
| badge_style = "background: linear-gradient(135deg, #10b981, #059669); color: white;" | |
| mad_text = "EXCELLENTE CONFORMITÉ" | |
| elif mad_value < 0.012: | |
| badge_style = "background: linear-gradient(135deg, #22c55e, #16a34a); color: white;" | |
| mad_text = "BONNE CONFORMITÉ" | |
| elif mad_value < 0.015: | |
| badge_style = "background: linear-gradient(135deg, #f59e0b, #d97706); color: white;" | |
| mad_text = "CONFORMITÉ MARGINALE" | |
| elif mad_value < 0.020: | |
| badge_style = "background: linear-gradient(135deg, #f97316, #ea580c); color: white;" | |
| mad_text = "FAIBLE CONFORMITÉ" | |
| else: | |
| # Si le MAD est ≥ 0.020, la déviation est forte, donc la conformité est très faible/absente. | |
| badge_style = "background: linear-gradient(135deg, #ef4444, #dc2626); color: white;" | |
| mad_text = "TRÈS FAIBLE CONFORMITÉ" | |
| html_output += f"<div style='margin-top: 10px; font-size: 1.1em; font-weight: bold; padding: 10px; text-align: center; {badge_style}'>" | |
| html_output += mad_text | |
| html_output += "</div>" | |
| # Guide ultra-compact | |
| #html_output += "<div style='background: #f9fafb; padding: 10px; border-radius: 6px; margin-top: 10px; font-size: 12px;'>" | |
| #html_output += f"<strong>{N:,}</strong> obs" | |
| #if N < 100: | |
| # html_output += " <span style='color: #f59e0b;'>⚠️ Petit</span>" | |
| #elif N >= 10000: | |
| # html_output += " <span style='color: #10b981;'>✅ Large (sensible)</span>" | |
| #html_output += f" • Chi² : <strong>{chi2_stat:.1f}</strong> ({chi2_stat/chi2_critique:.1f}x seuil)" | |
| #html_output += "</div>" | |
| html_output += "</div>" | |
| return html_output, temp_file.name |