""" DATA_EXPLORATION.PY Unite 2 : Analyses Exploratoires ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Fonctions publiques : enrich_ex(sheets) -> 27 scores EX_* silencieux render_filter_bar(sheets) -> filtre global, retourne sheets filtre render_exploration(sheets, sec) -> visuels U2 (6+ par feuille) """ import re import streamlit as st import pandas as pd import numpy as np import plotly.graph_objects as go import plotly.express as px from datetime import datetime from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA _C = { "bg": "#0a0e12", "card": "#0f141a", "border": "rgba(80,100,120,0.25)", "accent": "#58a6ff", "ex": "#f39c12", "success": "#2ecc71", "warning": "#f39c12", "critical": "#c0392b", "neutral": "#5a6a7a", "text": "#a8b8c8", "subtext": "#5a6a7a", "grid": "rgba(80,100,120,0.12)", } _PAL = ["#58a6ff","#2ecc71","#f39c12","#c0392b","#a78bfa","#38bdf8","#fb7185","#34d399"] _FONT = "JetBrains Mono, Courier New, monospace" _BASE = dict( paper_bgcolor=_C["card"], plot_bgcolor=_C["card"], font=dict(color=_C["text"], family=_FONT, size=11), title_font=dict(size=12, color=_C["text"], family=_FONT), margin=dict(t=44, b=36, l=48, r=24), legend=dict(bgcolor=_C["bg"], bordercolor=_C["border"], borderwidth=1, font=dict(size=10, family=_FONT)), xaxis=dict(gridcolor=_C["grid"], zerolinecolor=_C["grid"], tickfont=dict(family=_FONT, size=10)), yaxis=dict(gridcolor=_C["grid"], zerolinecolor=_C["grid"], tickfont=dict(family=_FONT, size=10)), colorway=_PAL, ) # ── Helpers ─────────────────────────────────────────────────────────────────── def _L(**kw): m = {**_BASE} for ax in ("xaxis", "yaxis"): if ax in kw: m[ax] = {**m[ax], **kw.pop(ax)} m.update(kw) return m def _ef(msg, h=220): fig = go.Figure() fig.add_annotation(text=msg, xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=12, color=_C["subtext"], family=_FONT)) fig.update_layout(**_L(height=h)) return fig def _age_calc(series): today = datetime.today() def _a(v): try: d = pd.to_datetime(str(v), dayfirst=True, errors="coerce") return int((today - d).days / 365.25) if not pd.isna(d) else np.nan except Exception: return np.nan return series.apply(_a) def _safe(df, col, default=0): return df[col].fillna(default) if col in df.columns else pd.Series(default, index=df.index) def _ctx(text: str): """Bandeau de contexte orange, signature Exploration.""" st.markdown( f'
{text}
', unsafe_allow_html=True, ) def _age_bin(series_age: pd.Series) -> pd.Series: """Convertit une serie d'ages en tranches textuelles. Pas de .cat.categories.""" bins = [0, 25, 35, 45, 55, 120] labs = ["<25", "25-34", "35-44", "45-54", "55+"] # pd.cut retourne une Categorical; on la convertit en str pour eviter .cat result = pd.cut(series_age, bins=bins, labels=labs, right=False) return result.astype(str).replace("nan", np.nan) # ══════════════════════════════════════════════════════════════════════════════ # ENRICHISSEMENT EX_* (silencieux - 27 scores) # ══════════════════════════════════════════════════════════════════════════════ def _pca_ex(df: pd.DataFrame, feat_cols: list, label_col: str, color_col: str, color_title: str, title: str, colorscale=None) -> go.Figure: """PCA 2D generique pour l'exploration — signature Exploration (orange).""" if colorscale is None: colorscale = [[0,"#2ecc71"],[0.5,"#f39c12"],[1,"#c0392b"]] avail = [c for c in feat_cols if c in df.columns and pd.api.types.is_numeric_dtype(df[c])] if len(df) < 3 or len(avail) < 2: return _ef(f"PCA indisponible ({len(df)} lignes, {len(avail)} features numeriques).", 340) X = df[avail].fillna(df[avail].median()).values try: X_sc = StandardScaler().fit_transform(X) n_c = min(2, X_sc.shape[0], X_sc.shape[1]) pca = PCA(n_components=n_c, random_state=42) comp = pca.fit_transform(X_sc) var1 = pca.explained_variance_ratio_[0] * 100 var2 = pca.explained_variance_ratio_[1] * 100 if n_c > 1 else 0.0 except Exception as e: return _ef(f"PCA erreur : {e}", 340) pc2 = comp[:,1] if n_c > 1 else [0]*len(comp) labels = df[label_col].astype(str).str[:12] if label_col in df.columns else df.index.astype(str) color_vals = df[color_col].fillna(df[color_col].median()) if color_col in df.columns else pd.Series(0, index=df.index) fig = go.Figure(go.Scatter( x=comp[:,0], y=pc2, mode="markers+text", text=labels, textposition="top center", textfont=dict(size=8, family=_FONT, color=_C["subtext"]), marker=dict(size=12, color=color_vals, colorscale=colorscale, showscale=True, colorbar=dict(title=color_title, tickfont=dict(size=9, family=_FONT), len=0.65), line=dict(width=1.5, color=_C["bg"])), hovertemplate=( "%{text}
PC1 : %{x:.2f}
PC2 : %{y:.2f}
" + color_title + " : %{marker.color:.3f}" ), )) fig.update_layout(**_L( title=f"{title} ({var1:.0f}%+{var2:.0f}% var.)", height=340, showlegend=False, xaxis_title=f"PC1 ({var1:.1f}%)", yaxis_title=f"PC2 ({var2:.1f}%)", )) return fig def enrich_ex(sheets: dict) -> dict: s = {k: df.copy() for k, df in sheets.items() if isinstance(df, pd.DataFrame)} cli = s.get("Clients_KYC", pd.DataFrame()) gar = s.get("Garants_KYC", pd.DataFrame()) prt = s.get("Prets_Master", pd.DataFrame()) upd = s.get("Prets_Update", pd.DataFrame()) rem = s.get("Remboursements", pd.DataFrame()) adj = s.get("Ajustements_Echeances", pd.DataFrame()) # ── Clients (5) ─────────────────────────────────────────────────────────── if not cli.empty: # EX_Income_Volatility_Matrix if "Secteur_Activite" in cli.columns: rev = _safe(cli, "Revenus_Mensuels").astype(float) autre = _safe(cli, "Autres_Revenus").astype(float) ratio = autre / rev.replace(0, np.nan) cli["EX_Income_Volatility_Matrix"] = ( ratio.groupby(cli["Secteur_Activite"]) .transform(lambda x: x.std(ddof=0) if len(x) > 1 else 0.0) ).round(3) else: cli["EX_Income_Volatility_Matrix"] = 0.0 # EX_AML_Risk_Correlation risky = {"ESPECES","INCONNU","TIERS","NON_DECLARE"} def _aml(row): aml = str(row.get("Verification_AML","")).upper() org = str(row.get("Origine_Fonds","")).upper() return 1 if aml != "VERIFIE" and any(r in org for r in risky) else 0 cli["EX_AML_Risk_Correlation"] = cli.apply(_aml, axis=1) # EX_Contact_Reliability_Index def _contact(row): sc = 0 ph = str(row.get("Telephone","")).strip() em = str(row.get("Email","")).strip() sc += 2 if re.match(r"^\+[\d\s\-]{9,15}$", ph) else (1 if re.match(r"[\d\+][\d\s\-]{7,}", ph) else 0) sc += 2 if re.match(r"^[^@\s]{2,}@[^@\s]{2,}\.[a-zA-Z]{2,6}$", em) else 0 return sc cli["EX_Contact_Reliability_Index"] = cli.apply(_contact, axis=1) # EX_Neighborhood_Income_Benchmark if "Quartier" in cli.columns and "Revenus_Mensuels" in cli.columns: rev_q = cli.groupby("Quartier")["Revenus_Mensuels"].transform("mean") cli["EX_Neighborhood_Income_Benchmark"] = ( _safe(cli,"Revenus_Mensuels") / rev_q.replace(0, np.nan) ).round(3) else: cli["EX_Neighborhood_Income_Benchmark"] = 1.0 # EX_Professional_Seniority_Efficiency rn = (_safe(cli,"Revenus_Mensuels") - _safe(cli,"Charges_Estimees")).replace(0, np.nan) cli["EX_Professional_Seniority_Efficiency"] = ( rn / _safe(cli,"Anciennete_Emploi",1).replace(0, np.nan) ).round(2) s["Clients_KYC"] = cli # ── Garants (5) ────────────────────────────────────────────────────────── if not gar.empty: gar["EX_Guarantor_Disposable_Income"] = ( _safe(gar,"Revenus_Mensuels") - _safe(gar,"Charges_Estimees") ).round(0) cf = _safe(gar,"Checking_frequency",1).replace(0, np.nan) gar["EX_Wealth_to_Risk_Ratio"] = (_safe(gar,"Patrimoine_Declare") / cf).round(2) if "Ville" in gar.columns and "Secteur_Activite" in gar.columns: gar["EX_Guarantee_Diversity_Score"] = gar.groupby("Ville")["Secteur_Activite"].transform("nunique") else: gar["EX_Guarantee_Diversity_Score"] = 1 if not cli.empty and "ID_Client" in cli.columns and "Moyen_Transfert" in cli.columns: cli_mt = set(cli["Moyen_Transfert"].str.upper().dropna().tolist()) gar["EX_Bank_Transfer_Interoperability"] = gar.apply( lambda row: 1 if str(row.get("Moyen_Transfert","")).upper() in cli_mt else 0, axis=1) else: gar["EX_Bank_Transfer_Interoperability"] = 0 rev_tot = (_safe(gar,"Revenus_Mensuels") + _safe(gar,"Autres_Revenus")).replace(0, np.nan) gar["EX_Garant_Liability_Pressure"] = (_safe(gar,"Charges_Estimees") / rev_tot).round(3) s["Garants_KYC"] = gar # ── Prets (5) ───────────────────────────────────────────────────────────── if not prt.empty: if not rem.empty and "ID_Pret" in rem.columns: int_enc = rem.groupby("ID_Pret")["Montant_Interets"].sum().to_dict() prt["EX_Effective_Yield_Realization"] = ( _safe(prt,"ID_Pret").map(int_enc).fillna(0) / _safe(prt,"Cout_Credit",1).replace(0, np.nan) ).round(3) else: prt["EX_Effective_Yield_Realization"] = np.nan if "Motif" in prt.columns and "Statut" in prt.columns: def_mask = prt["Statut"].str.upper().isin(["DEFAUT","LITIGE"]) def _def_by_motif(motif_val): sub = prt[prt["Motif"] == motif_val] if sub.empty: return 0.0 return round(sub["Statut"].str.upper().isin(["DEFAUT","LITIGE"]).mean(), 3) prt["EX_Default_Probability_By_Motif"] = prt["Motif"].apply(_def_by_motif) else: prt["EX_Default_Probability_By_Motif"] = 0.0 if not cli.empty and "ID_Client" in cli.columns and "Pers_Charge" in cli.columns: pch_map = cli.set_index("ID_Client")["Pers_Charge"].to_dict() prt["_pch"] = _safe(prt,"ID_Client").map(pch_map).fillna(1).replace(0,1) prt["EX_Loan_Size_vs_Family_Size"] = (_safe(prt,"Montant_Capital") / prt["_pch"]).round(0) prt.drop(columns=["_pch"], inplace=True, errors="ignore") else: prt["EX_Loan_Size_vs_Family_Size"] = np.nan if "Type_Pret" in prt.columns: prt["EX_Duration_Anomaly_Detection"] = ( prt.groupby("Type_Pret")["Duree_Semaines"] .transform(lambda x: (x - x.mean()) / x.std() if x.std() > 0 else 0.0) ).round(2) else: prt["EX_Duration_Anomaly_Detection"] = 0.0 prt["EX_Endettement_Critical_Threshold"] = ( _safe(prt,"Taux_Endettement").astype(float) > 40.0 ).astype(int) s["Prets_Master"] = prt # ── Prets_Update (2) ───────────────────────────────────────────────────── if not upd.empty: modif_cols = ["Montant_Capital","Taux_Hebdo","Duree_Semaines","Motif","Moyen_Transfert"] if not prt.empty and "ID_Pret" in prt.columns: def _pattern(row): base = prt[prt["ID_Pret"] == row.get("ID_Pret","")] if base.empty: return 0 base = base.iloc[0] return sum(1 for col in modif_cols if col in row and col in base and str(row[col]) != str(base[col])) upd["EX_Modification_Pattern_Clustering"] = upd.apply(_pattern, axis=1) else: upd["EX_Modification_Pattern_Clustering"] = 0 if not prt.empty and "Montant_Total" in prt.columns and "ID_Pret" in upd.columns: mt_orig = prt.set_index("ID_Pret")["Montant_Total"].to_dict() upd["EX_Version_Drift_Analysis"] = ( _safe(upd,"Montant_Total") - _safe(upd,"ID_Pret").map(mt_orig).fillna(0) ).round(0) else: upd["EX_Version_Drift_Analysis"] = np.nan s["Prets_Update"] = upd # ── Remboursements (5) ──────────────────────────────────────────────────── if not rem.empty: rem["EX_Early_Payment_Propensity"] = (_safe(rem,"Jours_Retard").astype(float) < 0).astype(int) pen = _safe(rem,"Penalites_Retard").astype(float) mv = _safe(rem,"Montant_Verse").astype(float) rem["EX_Collection_Cost_Efficiency"] = (mv / (mv + pen).replace(0, np.nan)).round(4) def _anomaly_hour(ts): try: h = pd.to_datetime(str(ts), errors="coerce").hour return 1 if pd.notna(h) and (h >= 23 or h < 5) else 0 except Exception: return 0 rem["EX_Timestamp_Anomaly_Log"] = rem.get("Timestamp", pd.Series(dtype=str)).apply(_anomaly_hour) rem["EX_Payment_Consistency_Score"] = ( rem.groupby("ID_Client")["Montant_Verse"].transform(lambda x: x.std(ddof=0)).fillna(0).round(2) ) if "ID_Client" in rem.columns else 0.0 if "Moyen_Paiement" in rem.columns and "Statut_Paiement" in rem.columns: good = rem["Statut_Paiement"].str.upper().isin(["PAYE","PONCTUEL","ANTICIPE"]) rate = rem.groupby("Moyen_Paiement").apply(lambda g: good.reindex(g.index).mean()) rem["EX_Recovery_Channel_Success"] = _safe(rem,"Moyen_Paiement").map(rate).round(3) else: rem["EX_Recovery_Channel_Success"] = np.nan s["Remboursements"] = rem # ── Ajustements (5) ────────────────────────────────────────────────────── if not adj.empty: if "ID_Pret" in adj.columns and not prt.empty and "ID_Client" in prt.columns: cli_map = prt.set_index("ID_Pret")["ID_Client"].to_dict() adj["_cli"] = _safe(adj,"ID_Pret").map(cli_map) adj["EX_Adjustment_Frequency_Per_Client"] = adj.groupby("_cli")["_cli"].transform("count").fillna(0) adj.drop(columns=["_cli"], inplace=True, errors="ignore") else: adj["EX_Adjustment_Frequency_Per_Client"] = 0 if not prt.empty and "Statut" in prt.columns and "Raison" in adj.columns: def _r2d(raison): ids = adj[adj["Raison"] == raison].get("ID_Pret", pd.Series(dtype=str)) sub = prt[prt["ID_Pret"].isin(ids)] return 0.0 if sub.empty else round(sub["Statut"].str.upper().isin(["DEFAUT","LITIGE"]).mean(), 3) adj["EX_Reason_to_Default_Bridge"] = adj.get("Raison", pd.Series(dtype=str)).apply(_r2d) else: adj["EX_Reason_to_Default_Bridge"] = np.nan if not prt.empty and "ID_Pret" in adj.columns and "Montant_Capital" in prt.columns: cap_map = prt.set_index("ID_Pret")["Montant_Capital"].to_dict() cap_init = _safe(adj,"ID_Pret").map(cap_map).replace(0, np.nan) adj["EX_Amount_Expansion_Factor"] = (_safe(adj,"Montant_Additionnel") / cap_init).round(4) else: adj["EX_Amount_Expansion_Factor"] = np.nan if not prt.empty and "Date_Fin" in prt.columns: fm = prt.set_index("ID_Pret")["Date_Fin"].to_dict() df_ = pd.to_datetime(_safe(adj,"ID_Pret").map(fm), errors="coerce") da = pd.to_datetime(adj.get("Date_Creation"), errors="coerce") adj["EX_Timing_of_Adjustment"] = (df_ - da).dt.days else: adj["EX_Timing_of_Adjustment"] = np.nan if not prt.empty and "Date_Creation" in prt.columns: dc_map = prt.set_index("ID_Pret")["Date_Creation"].to_dict() dp = pd.to_datetime(_safe(adj,"ID_Pret").map(dc_map), errors="coerce") da = pd.to_datetime(adj.get("Date_Creation"), errors="coerce") adj["EX_Adjustment_Latency"] = (da - dp).dt.days else: adj["EX_Adjustment_Latency"] = np.nan s["Ajustements_Echeances"] = adj return s # ══════════════════════════════════════════════════════════════════════════════ # FILTRE GLOBAL # ══════════════════════════════════════════════════════════════════════════════ def render_filter_bar(sheets: dict) -> dict: """Affiche les filtres, retourne un nouveau dict de DataFrames filtrés.""" st.markdown( f'
' f'FILTRE GLOBAL · EXPLORATION
', unsafe_allow_html=True, ) prt = sheets.get("Prets_Master", pd.DataFrame()) rem = sheets.get("Remboursements", pd.DataFrame()) cli = sheets.get("Clients_KYC", pd.DataFrame()) fc1, fc2, fc3, fc4 = st.columns(4) with fc1: statuts = ["Tous"] + (sorted(prt["Statut"].dropna().unique().tolist()) if not prt.empty and "Statut" in prt.columns else []) f_statut = st.selectbox("Statut pret", statuts, key="ex_f_statut") with fc2: offres = ["Tous"] + (sorted(prt["Offre"].dropna().unique().tolist()) if not prt.empty and "Offre" in prt.columns else []) f_offre = st.selectbox("Offre", offres, key="ex_f_offre") with fc3: sp_vals = ["Tous"] + (sorted(rem["Statut_Paiement"].dropna().unique().tolist()) if not rem.empty and "Statut_Paiement" in rem.columns else []) f_sp = st.selectbox("Statut paiement", sp_vals, key="ex_f_sp") with fc4: villes = ["Tous"] + (sorted(cli["Ville"].dropna().unique().tolist()) if not cli.empty and "Ville" in cli.columns else []) f_ville = st.selectbox("Ville", villes, key="ex_f_ville") st.divider() # Appliquer les filtres filtered = {k: df.copy() for k, df in sheets.items() if isinstance(df, pd.DataFrame)} if f_statut != "Tous" and not prt.empty and "Statut" in prt.columns: ids_ok = set(prt[prt["Statut"] == f_statut].get("ID_Pret", pd.Series(dtype=str)).tolist()) for sh in ("Prets_Master","Prets_Update","Remboursements","Ajustements_Echeances"): df = filtered.get(sh, pd.DataFrame()) if not df.empty and "ID_Pret" in df.columns: filtered[sh] = df[df["ID_Pret"].isin(ids_ok)] if not filtered.get("Prets_Master",pd.DataFrame()).empty: ids_cli = set(filtered["Prets_Master"].get("ID_Client", pd.Series(dtype=str)).dropna().tolist()) if not filtered.get("Clients_KYC",pd.DataFrame()).empty and "ID_Client" in filtered["Clients_KYC"].columns: filtered["Clients_KYC"] = filtered["Clients_KYC"][filtered["Clients_KYC"]["ID_Client"].isin(ids_cli)] if f_offre != "Tous" and "Prets_Master" in filtered and "Offre" in filtered["Prets_Master"].columns: filtered["Prets_Master"] = filtered["Prets_Master"][filtered["Prets_Master"]["Offre"] == f_offre] if f_sp != "Tous" and "Remboursements" in filtered and "Statut_Paiement" in filtered["Remboursements"].columns: filtered["Remboursements"] = filtered["Remboursements"][filtered["Remboursements"]["Statut_Paiement"] == f_sp] if f_ville != "Tous" and "Clients_KYC" in filtered and "Ville" in filtered["Clients_KYC"].columns: filtered["Clients_KYC"] = filtered["Clients_KYC"][filtered["Clients_KYC"]["Ville"] == f_ville] return filtered # ══════════════════════════════════════════════════════════════════════════════ # VISUELS U2 (6+ par feuille, axes causaux) # ══════════════════════════════════════════════════════════════════════════════ def _explore_clients(df: pd.DataFrame, sheets: dict): if df.empty: st.info("Table Clients_KYC vide apres filtres."); return _ctx("SOURCE : Clients_KYC | AXEE : mecanismes de survie financiere, " "concentration geographique, profils demographiques") c1, c2 = st.columns(2) # 1. Matrice survie financiere with c1: rev = _safe(df,"Revenus_Mensuels").astype(float) chg = _safe(df,"Charges_Estimees").astype(float) ral = rev - chg lgt_col = df.get("Statut_Logement", pd.Series("Autre", index=df.index)).fillna("Autre") lgt_map = {"Locataire":_C["warning"],"Proprietaire":_C["success"], "Heberge":_C["accent"],"Hébergé":_C["accent"]} fig = go.Figure() for lval in lgt_col.unique(): mask = lgt_col == lval; sub = df[mask] fig.add_trace(go.Scatter( x=rev[mask], y=chg[mask], mode="markers", name=str(lval), marker=dict(size=12, color=lgt_map.get(str(lval),_C["neutral"]), line=dict(width=1.5, color=_C["bg"])), text=sub.get("Nom_Complet", sub.index).astype(str), customdata=ral[mask].round(0), hovertemplate="%{text}
Rev : %{x:,}
Chg : %{y:,}
Reste : %{customdata:,}")) mx = max(rev.max(), chg.max()) * 1.05 fig.add_trace(go.Scatter(x=[0,mx], y=[0,mx], mode="lines", line=dict(color=_C["critical"],dash="dot",width=1),showlegend=False,hoverinfo="skip")) fig.update_layout(**_L(title="SURVIE FINANCIERE (ligne rouge = seuil critique)", height=310, xaxis_title="Revenus", yaxis_title="Charges")) st.plotly_chart(fig, use_container_width=True) # 2. Strip plot revenus par secteur with c2: if "Secteur_Activite" in df.columns: bench = df.get("EX_Neighborhood_Income_Benchmark", pd.Series(1.0, index=df.index)).fillna(1.0) fig = go.Figure() for i, sec in enumerate(df["Secteur_Activite"].dropna().unique()): sub = df[df["Secteur_Activite"]==sec]; b = bench.reindex(sub.index).fillna(1.0) np.random.seed(i) jit = np.random.uniform(-0.15, 0.15, len(sub)) fig.add_trace(go.Scatter( x=[str(sec)]*len(sub), y=sub["Revenus_Mensuels"].tolist(), mode="markers", name=str(sec), marker=dict(size=10, color=[_C["success"] if v>=1.0 else _C["critical"] for v in b], line=dict(width=1, color=_C["bg"]), opacity=0.85), text=sub.get("Nom_Complet", sub.index).astype(str), customdata=b.round(2), hovertemplate="%{text}
%{y:,} FCFA
Bench : %{customdata}x")) fig.update_layout(**_L(title="DISPERSION REVENUS/SECTEUR (vert = dessus moy. quartier)", height=310, showlegend=False, xaxis_title="Secteur", yaxis_title="Revenus")) st.plotly_chart(fig, use_container_width=True) c3, c4 = st.columns(2) # 3. Heatmap geographique with c3: if "Ville" in df.columns and "Quartier" in df.columns: pivot = df.groupby(["Ville","Quartier"]).size().reset_index(name="n") fig = go.Figure(go.Heatmap( x=pivot["Quartier"], y=pivot["Ville"], z=pivot["n"], colorscale=[[0,_C["card"]],[0.4,"#1a3a5c"],[1,_C["accent"]]], text=pivot["n"], texttemplate="%{text}", hovertemplate="%{y} · %{x}
Clients : %{z}", colorbar=dict(title="Clients",tickfont=dict(size=9,family=_FONT)), xgap=2, ygap=2)) fig.update_layout(**_L(title="DENSITE GEOGRAPHIQUE Ville x Quartier", height=300, xaxis=dict(tickangle=-30))) st.plotly_chart(fig, use_container_width=True) # 4. Parallel categories with c4: cols_par = ["Etat_Civil","Genre","Statut_Pro"] if all(c in df.columns for c in cols_par): df2 = df[cols_par].fillna("N/A") fig = go.Figure(go.Parcats( dimensions=[ dict(label="Etat Civil", values=df2["Etat_Civil"]), dict(label="Genre", values=df2["Genre"]), dict(label="Statut Pro", values=df2["Statut_Pro"]), ], line=dict(color=_PAL[0], colorscale=[[0,_PAL[0]],[1,_PAL[1]]]), labelfont=dict(family=_FONT, size=10, color=_C["text"]), tickfont=dict(family=_FONT, size=9, color=_C["subtext"]), )) fig.update_layout(**_L(title="FLUX DEMOGRAPHIQUE Etat Civil > Genre > Statut Pro",height=300)) st.plotly_chart(fig, use_container_width=True) c5, c6 = st.columns(2) # 5. Scatter AML risk vs revenu with c5: aml_risk = df.get("EX_AML_Risk_Correlation", pd.Series(0, index=df.index)).fillna(0) fig = go.Figure() for val, color, label in [(1,_C["critical"],"Risque AML"),(0,_C["success"],"AML OK")]: mask = aml_risk == val; sub = df[mask] if not sub.empty: fig.add_trace(go.Scatter(x=sub["Revenus_Mensuels"], y=_safe(sub,"Patrimoine_Declare"), mode="markers", name=label, marker=dict(size=11, color=color, line=dict(width=1.5, color=_C["bg"])), text=sub.get("Nom_Complet", sub.index).astype(str), hovertemplate="%{text}
Rev : %{x:,}
Patr : %{y:,}")) fig.update_layout(**_L(title="PROFIL AML x PATRIMOINE",height=290,xaxis_title="Revenus",yaxis_title="Patrimoine")) st.plotly_chart(fig, use_container_width=True) # 6. Bar volatilite revenu par secteur with c6: if "Secteur_Activite" in df.columns and "EX_Income_Volatility_Matrix" in df.columns: vol = df.groupby("Secteur_Activite")["EX_Income_Volatility_Matrix"].mean().sort_values() fig = go.Figure(go.Bar(x=vol.values, y=vol.index.astype(str), orientation="h", marker_color=[_C["critical"] if v>0.4 else _C["warning"] if v>0.2 else _C["accent"] for v in vol.values], text=[f"{v:.2f}" for v in vol.values], textposition="outside")) fig.update_layout(**_L(title="VOLATILITE REVENUS PAR SECTEUR (EX_Income_Volatility)", height=290, showlegend=False)) st.plotly_chart(fig, use_container_width=True) # 7. Scatter efficacite seniorite if "EX_Professional_Seniority_Efficiency" in df.columns and "Anciennete_Emploi" in df.columns: eff = df["EX_Professional_Seniority_Efficiency"].fillna(0) fig = go.Figure() if "Statut_Pro" in df.columns: for i, sp in enumerate(df["Statut_Pro"].dropna().unique()): sub = df[df["Statut_Pro"]==sp] fig.add_trace(go.Scatter( x=sub["Anciennete_Emploi"], y=sub["EX_Professional_Seniority_Efficiency"].fillna(0), mode="markers", name=str(sp), marker=dict(size=10, color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])), text=sub.get("Nom_Complet",sub.index).astype(str), hovertemplate="%{text}
Anciennete : %{x} mois
Efficacite : %{y:.1f}")) fig.update_layout(**_L(title="EFFICACITE PAR SENIORITE (revenu net / anciennete)", height=260, xaxis_title="Anciennete (mois)", yaxis_title="Efficacite")) st.plotly_chart(fig, use_container_width=True) # 8. PCA 2D Clients st.divider() _ctx("CLUSTER MAP PCA 2D — Similarite multi-dimensionnelle des clients") cli_feats = [ "Revenus_Mensuels","Autres_Revenus","Charges_Estimees","Anciennete_Emploi", "Pers_Charge","Patrimoine_Declare","Checking_frequency", "DX_Living_Cost_Ratio","DX_Disposable_Income_Stability", "DX_Digital_Accessibility_Grade","DX_Professional_Trajectory_Index", "EX_Income_Volatility_Matrix","EX_Contact_Reliability_Index", "EX_Neighborhood_Income_Benchmark","EX_Professional_Seniority_Efficiency", ] # Couleur prioritaire : FL_Risk_Target si present, sinon DX_Living_Cost_Ratio color_col = "FL_Risk_Target" if "FL_Risk_Target" in df.columns else "DX_Living_Cost_Ratio" color_lbl = "Risque" if color_col == "FL_Risk_Target" else "LCR" c_pca1, c_pca2 = st.columns(2) with c_pca1: fig = _pca_ex(df, cli_feats, "Nom_Complet", color_col, color_lbl, "CLIENTS (couleur = Risque)") st.plotly_chart(fig, use_container_width=True) with c_pca2: fig = _pca_ex(df, cli_feats, "Nom_Complet", "DX_Disposable_Income_Stability", "Stabilite rev.", "CLIENTS (couleur = Stabilite Revenus)", colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]]) st.plotly_chart(fig, use_container_width=True) def _explore_garants(df: pd.DataFrame, sheets: dict): if df.empty: st.info("Table Garants_KYC vide."); return _ctx("SOURCE : Garants_KYC | AXEE : solidite mecanique, " "interoperabilite bancaire, pression financiere comparee") # Fiche comparative st.markdown( f'
' f'COMPARAISON PROFILS GARANTS
', unsafe_allow_html=True, ) kpis = [ ("Revenus Mensuels", "Revenus_Mensuels", "{:,.0f} FCFA"), ("Patrimoine Declare", "Patrimoine_Declare", "{:,.0f} FCFA"), ("Charges Estimees", "Charges_Estimees", "{:,.0f} FCFA"), ("Personnes a charge", "Pers_Charge", "{:.0f}"), ("Revenu disponible", "EX_Guarantor_Disposable_Income", "{:,.0f} FCFA"), ("Pression financiere", "EX_Garant_Liability_Pressure", "{:.1%}"), ("Leverage patrimoine", "DX_Guarantee_Leverage_Score", "{:.1f}x"), ("Disponibilite", "DX_Garant_Availability_Score", "{:.0f} / 2"), ] cols = st.columns(len(df)+1) with cols[0]: for label,_,_ in kpis: st.markdown(f'
{label}
', unsafe_allow_html=True) for i,(_, row) in enumerate(df.iterrows()): with cols[i+1]: nom = str(row.get("Nom_Complet", f"GARANT {i+1}")) st.markdown(f'
' f'{nom[:20]}
', unsafe_allow_html=True) for _,kcol,kfmt in kpis: val = row.get(kcol, None) if val is None or (isinstance(val, float) and np.isnan(val)): display = "—" else: try: display = kfmt.format(float(val)) except Exception: display = str(val) st.markdown(f'
{display}
', unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) c1, c2 = st.columns(2) # 1. Scatter wealth vs risk with c1: wtr = df.get("EX_Wealth_to_Risk_Ratio", pd.Series(1.0, index=df.index)).fillna(1.0) fig = go.Figure(go.Scatter( x=_safe(df,"Revenus_Mensuels"), y=_safe(df,"Patrimoine_Declare"), mode="markers+text", marker=dict(size=(wtr.clip(0,100)/wtr.clip(0,100).max()*18+8).tolist() if wtr.max()>0 else [12]*len(df), color=_PAL[0], line=dict(width=1.5, color=_C["bg"])), text=df.get("Nom_Complet", df.index).astype(str), textposition="top center", customdata=wtr.round(1), hovertemplate="%{text}
Rev : %{x:,}
Patr : %{y:,}
W/R : %{customdata}")) fig.update_layout(**_L(title="WEALTH-TO-RISK (taille = ratio patrimoine/risque)",height=280,showlegend=False)) st.plotly_chart(fig, use_container_width=True) # 2. Bar logistique entite x canal with c2: if "Entite_Financiere" in df.columns and "Moyen_Transfert" in df.columns: cross = df.groupby(["Entite_Financiere","Moyen_Transfert"]).size().reset_index(name="n") fig = go.Figure() for i,mt in enumerate(cross["Moyen_Transfert"].unique()): sub = cross[cross["Moyen_Transfert"]==mt] fig.add_trace(go.Bar(name=str(mt),x=sub["Entite_Financiere"],y=sub["n"],marker_color=_PAL[i%len(_PAL)])) fig.update_layout(**_L(title="EFFICACITE LOGISTIQUE Entite x Canal",barmode="stack",height=280)) st.plotly_chart(fig, use_container_width=True) c3, c4 = st.columns(2) # 3. Bar pression liability with c3: lp = df.get("EX_Garant_Liability_Pressure", pd.Series(0, index=df.index)).fillna(0) noms = df.get("Nom_Complet", df.index.astype(str)) fig = go.Figure(go.Bar(x=noms.tolist(), y=lp.tolist(), marker_color=[_C["critical"] if v>0.6 else _C["warning"] if v>0.35 else _C["success"] for v in lp], text=[f"{v:.1%}" for v in lp], textposition="outside")) fig.add_hline(y=0.6, line_dash="dot", line_color=_C["critical"], annotation_text="Seuil danger 60%", annotation_font_size=9) fig.update_layout(**_L(title="PRESSION CHARGES / REVENUS TOTAUX",height=280,showlegend=False,yaxis=dict(tickformat=".0%"))) st.plotly_chart(fig, use_container_width=True) # 4. Interoperabilite bancaire with c4: interop = df.get("EX_Bank_Transfer_Interoperability", pd.Series(0, index=df.index)).fillna(0) cnt = interop.value_counts() fig = go.Figure(go.Pie( labels=["Compatible","Incompatible"], values=[cnt.get(1,0), cnt.get(0,0)], marker_colors=[_C["success"], _C["critical"]], hole=0.5)) fig.update_layout(**_L(title="INTEROPERABILITE BANCAIRE (meme canal que client)",height=280)) st.plotly_chart(fig, use_container_width=True) c5, c6 = st.columns(2) # 5. Box revenus par entite financiere with c5: if "Entite_Financiere" in df.columns: fig = go.Figure() for i,ef in enumerate(df["Entite_Financiere"].dropna().unique()): sub = df[df["Entite_Financiere"]==ef]["Revenus_Mensuels"].dropna() fig.add_trace(go.Box(y=sub, name=str(ef), marker_color=_PAL[i%len(_PAL)], boxpoints="all", jitter=0.3)) fig.update_layout(**_L(title="REVENUS PAR ENTITE FINANCIERE",height=280,showlegend=False)) st.plotly_chart(fig, use_container_width=True) # 6. Scatter famille vs disponible with c6: disp = df.get("EX_Guarantor_Disposable_Income", pd.Series(0, index=df.index)).fillna(0) fig = go.Figure(go.Scatter( x=_safe(df,"Pers_Charge"), y=disp, mode="markers+text", marker=dict(size=13, color=[_C["critical"] if v<0 else _C["success"] for v in disp], line=dict(width=1.5, color=_C["bg"])), text=df.get("Nom_Complet", df.index).astype(str), textposition="top center", hovertemplate="%{text}
Personnes a charge : %{x}
Disponible : %{y:,}")) fig.add_hline(y=0, line_dash="dot", line_color=_C["neutral"], line_width=1) fig.update_layout(**_L(title="PRESSION FAMILIALE vs REVENU DISPONIBLE",height=280, showlegend=False,xaxis_title="Pers. a charge",yaxis_title="Disponible")) st.plotly_chart(fig, use_container_width=True) # 7. PCA 2D Garants st.divider() _ctx("CLUSTER MAP PCA 2D — Positionnement comparatif des garants") gar_feats = [ "Revenus_Mensuels","Autres_Revenus","Charges_Estimees", "Patrimoine_Declare","Pers_Charge","Anciennete_Emploi","Checking_frequency", "DX_Guarantee_Leverage_Score","DX_Family_Pressure_Index", "DX_Garant_Availability_Score","DX_Net_Worth_Credibility_Check", "EX_Guarantor_Disposable_Income","EX_Wealth_to_Risk_Ratio","EX_Garant_Liability_Pressure", ] g_pca1, g_pca2 = st.columns(2) with g_pca1: fig = _pca_ex(df, gar_feats, "Nom_Complet", "EX_Garant_Liability_Pressure", "Pression fin.", "GARANTS (couleur = Pression Charges/Revenus)") st.plotly_chart(fig, use_container_width=True) with g_pca2: fig = _pca_ex(df, gar_feats, "Nom_Complet", "DX_Guarantee_Leverage_Score", "Leverage", "GARANTS (couleur = Leverage Patrimoine)", colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]]) st.plotly_chart(fig, use_container_width=True) def _explore_prets(df: pd.DataFrame, sheets: dict): if df.empty: st.info("Table Prets_Master vide."); return _ctx("SOURCE : Prets_Master + Prets_Update | AXEE : elasticite taux/duree, " "pression d'endettement par age, performance hierarchique") # Fusion avec Update (version la plus recente) upd = sheets.get("Prets_Update", pd.DataFrame()) if not upd.empty and "ID_Pret" in upd.columns and "Version" in upd.columns: latest = upd.sort_values("Version", ascending=False).drop_duplicates("ID_Pret").set_index("ID_Pret") df_m = df.copy().set_index("ID_Pret") for col in ["Taux_Hebdo","Duree_Semaines","Montant_Capital","Statut","Motif"]: if col in latest.columns: df_m[col] = latest[col].combine_first(df_m.get(col, pd.Series(dtype=object))) df_m = df_m.reset_index() else: df_m = df.copy() # Calcul tranches d'age (STRING, pas Categorical) cli = sheets.get("Clients_KYC", pd.DataFrame()) if not cli.empty and "ID_Client" in cli.columns and "Date_Naissance" in cli.columns: cli2 = cli.copy() cli2["Age"] = _age_calc(cli2["Date_Naissance"]) cli2["Tranche_Age"] = _age_bin(cli2["Age"]) # retourne str, pas Categorical age_map = cli2.set_index("ID_Client")["Tranche_Age"].to_dict() df_m["Tranche_Age"] = _safe(df_m,"ID_Client").map(age_map) else: df_m["Tranche_Age"] = np.nan TRANCHES = ["<25","25-34","35-44","45-54","55+"] # ordre fixe, pas .cat.categories c1, c2 = st.columns(2) # 1. Elasticite taux / duree with c1: fig = go.Figure() for i,of in enumerate(df_m.get("Offre",pd.Series()).dropna().unique()): sub = df_m[df_m["Offre"]==of] over = sub.get("EX_Endettement_Critical_Threshold", pd.Series(0,index=sub.index)).fillna(0) eyr = sub.get("EX_Effective_Yield_Realization", pd.Series(0,index=sub.index)).fillna(0).clip(0,1) fig.add_trace(go.Scatter( x=sub["Duree_Semaines"], y=sub["Taux_Hebdo"], mode="markers", name=str(of), marker=dict( size=[(14 if v else 8) for v in over], color=_PAL[i%len(_PAL)], symbol=["diamond" if v else "circle" for v in over], line=dict(width=1.5, color=[_C["critical"] if v else _C["bg"] for v in over])), text=sub["ID_Pret"].astype(str), customdata=sub.get("Taux_Endettement",pd.Series(0,index=sub.index)).fillna(0).round(1), hovertemplate="%{text}
%{x} sem. Taux %{y:.2%}
Endett. : %{customdata}%")) fig.update_layout(**_L(title="ELASTICITE TAUX/DUREE (losange = endettement > 40%)", height=310, xaxis_title="Duree (semaines)", yaxis=dict(tickformat=".1%"), yaxis_title="Taux Hebdo")) st.plotly_chart(fig, use_container_width=True) # 2. Boxplot pression par tranche d'age ← BUG CORRIGE ICI with c2: df_bp = df_m.dropna(subset=["Tranche_Age","Taux_Endettement"]) if not df_bp.empty: crit = df_bp.get("EX_Endettement_Critical_Threshold", pd.Series(0,index=df_bp.index)).fillna(0) fig = go.Figure() # On itere sur la liste de strings, PAS sur .cat.categories for i, tr in enumerate(TRANCHES): sub = df_bp[df_bp["Tranche_Age"] == tr] if sub.empty: continue n_crit = int(crit.reindex(sub.index).fillna(0).sum()) fig.add_trace(go.Box( y=sub["Taux_Endettement"], name=str(tr), marker_color=_C["critical"] if n_crit > 0 else _PAL[i%len(_PAL)], boxpoints="all", jitter=0.3, hovertemplate=f"{tr}
Endett. : %{{y:.1f}}%")) fig.add_hline(y=40, line_dash="dot", line_color=_C["critical"], annotation_text="Seuil critique 40%", annotation_font=dict(size=9, color=_C["critical"])) fig.update_layout(**_L(title="PRESSION D'ENDETTEMENT PAR TRANCHE D'AGE", height=310, showlegend=False, yaxis_title="Taux Endettement (%)")) st.plotly_chart(fig, use_container_width=True) else: st.plotly_chart(_ef("Date_Naissance absente ou jointure impossible.", 310), use_container_width=True) # 3. Sunburst performance hier = df_m.groupby(["Offre","Type_Pret","Statut"]).size().reset_index(name="n") fig = px.sunburst(hier, path=["Offre","Type_Pret","Statut"], values="n", color="Statut", color_discrete_map={"ACTIF":"#58a6ff","TERMINE":"#2ecc71", "EN_RETARD":"#f39c12","UPDATED":"#5a6a7a", "DEFAUT":"#c0392b","LITIGE":"#c0392b"}) fig.update_traces(textfont=dict(family=_FONT,size=11), hovertemplate="%{label}
Prets : %{value}
Part : %{percentParent:.1%}") fig.update_layout(**_L(title="RADAR DE PERFORMANCE Offre > Type > Statut", height=370)) st.plotly_chart(fig, use_container_width=True) c3, c4 = st.columns(2) # 4. Scatter anomalie de duree with c3: if "EX_Duration_Anomaly_Detection" in df_m.columns and "Type_Pret" in df_m.columns: fig = go.Figure() for i,tp in enumerate(df_m["Type_Pret"].dropna().unique()): sub = df_m[df_m["Type_Pret"]==tp] anom = sub["EX_Duration_Anomaly_Detection"].fillna(0).abs() fig.add_trace(go.Scatter( x=sub["Duree_Semaines"], y=sub["EX_Duration_Anomaly_Detection"].fillna(0), mode="markers", name=str(tp), marker=dict(size=(anom*6+8).clip(upper=20).tolist(), color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])), text=sub["ID_Pret"].astype(str), hovertemplate="%{text}
Duree : %{x} sem.
Z-score : %{y:.2f}")) fig.add_hline(y=0, line_dash="dot", line_color=_C["neutral"], line_width=1) fig.update_layout(**_L(title="ANOMALIE DE DUREE PAR TYPE (z-score)",height=290, xaxis_title="Duree (semaines)",yaxis_title="Z-score")) st.plotly_chart(fig, use_container_width=True) # 5. Bar rendement effectif with c4: if "EX_Effective_Yield_Realization" in df_m.columns: df_m2 = df_m.dropna(subset=["EX_Effective_Yield_Realization"]) if not df_m2.empty: fig = go.Figure(go.Bar( x=df_m2["ID_Pret"].astype(str), y=df_m2["EX_Effective_Yield_Realization"], marker_color=[_C["success"] if v>=0.9 else _C["warning"] if v>=0.5 else _C["critical"] for v in df_m2["EX_Effective_Yield_Realization"]], text=[f"{v:.1%}" for v in df_m2["EX_Effective_Yield_Realization"]], textposition="outside")) fig.add_hline(y=1.0, line_dash="dot", line_color=_C["neutral"]) fig.update_layout(**_L(title="RENDEMENT EFFECTIF REALISE (1.0 = 100% interets encaisses)", height=290, showlegend=False, yaxis=dict(tickformat=".0%"))) st.plotly_chart(fig, use_container_width=True) # 6. Scatter capital vs pers a charge if "EX_Loan_Size_vs_Family_Size" in df_m.columns: fig = go.Figure() for i,of in enumerate(df_m.get("Offre",pd.Series()).dropna().unique()): sub = df_m[df_m["Offre"]==of] pch = _safe(cli,"Pers_Charge") if not cli.empty else pd.Series(0,index=sub.index) pch_vals = sub.get("_pch_tmp", sub["ID_Client"].map( cli.set_index("ID_Client")["Pers_Charge"].to_dict() if not cli.empty and "ID_Client" in cli.columns else {} ).fillna(0)) if not cli.empty and "ID_Client" in cli.columns and "Pers_Charge" in cli.columns else pd.Series(1,index=sub.index) fig.add_trace(go.Scatter( x=pch_vals, y=sub["Montant_Capital"], mode="markers", name=str(of), marker=dict(size=10, color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])), text=sub["ID_Pret"].astype(str), hovertemplate="%{text}
Pers. : %{x}
Capital : %{y:,}")) fig.update_layout(**_L(title="CAPITAL vs TAILLE MENAGE",height=260, xaxis_title="Personnes a charge",yaxis_title="Montant Capital")) st.plotly_chart(fig, use_container_width=True) # PCA 2D Prets st.divider() _ctx("CLUSTER MAP PCA 2D — Positionnement des prets dans l'espace contractuel") prt_feats_pca = [ "Montant_Capital","Taux_Hebdo","Taux_Endettement","Duree_Semaines", "Montant_Versement","Montant_Total","Cout_Credit","Nb_Versements", "DX_Interest_Yield_Factor","DX_Debt_to_Income_Pressure", "DX_Motif_Risk_Weight","DX_Collateral_Coverage_Ratio", "EX_Endettement_Critical_Threshold","EX_Duration_Anomaly_Detection", "EX_Effective_Yield_Realization", ] df_pca_prt = df_m.copy() df_pca_prt["_prt_risk"] = ( df_pca_prt.get("DX_Motif_Risk_Weight", pd.Series(0.5, index=df_pca_prt.index)).fillna(0.5) * 0.5 + (df_pca_prt["Taux_Endettement"].fillna(0) / 100).clip(0, 1) * 0.5 ).round(3) p_pca1, p_pca2 = st.columns(2) with p_pca1: fig = _pca_ex(df_pca_prt, prt_feats_pca, "ID_Pret", "_prt_risk", "Risque contrat", "PRETS (couleur = Motif Risk + Endettement)") st.plotly_chart(fig, use_container_width=True) with p_pca2: fig = _pca_ex(df_pca_prt, prt_feats_pca, "Nom_Complet", "Taux_Endettement", "Endettement %", "PRETS (couleur = Taux Endettement)", colorscale=[[0,"#2ecc71"],[0.4,"#f39c12"],[1,"#c0392b"]]) st.plotly_chart(fig, use_container_width=True) def _explore_prets_update(df: pd.DataFrame, sheets: dict): if df.empty: st.info("Table Prets_Update vide."); return _ctx("SOURCE : Prets_Update | AXEE : amplitude des modifications, " "derive de montant par version, patterns de renegociation") prt = sheets.get("Prets_Master", pd.DataFrame()) c1, c2 = st.columns(2) # 1. Version drift par pret with c1: if not prt.empty and "ID_Pret" in prt.columns and "Montant_Total" in prt.columns and "Montant_Total" in df.columns: mt_orig = prt.set_index("ID_Pret")["Montant_Total"].to_dict() drift = df.copy() drift["MT_Orig"] = _safe(drift,"ID_Pret").map(mt_orig) drift["Drift"] = _safe(drift,"Montant_Total") - drift["MT_Orig"].fillna(0) fig = go.Figure() for pid in drift["ID_Pret"].dropna().unique(): sub = drift[drift["ID_Pret"]==pid].sort_values("Version") col = _C["critical"] if sub["Drift"].iloc[-1]>0 else _C["success"] fig.add_trace(go.Scatter(x=sub["Version"],y=sub["Drift"],mode="lines+markers", name=str(pid),line=dict(color=col,width=1.5),marker=dict(size=7), hovertemplate=f"{pid}
V%{{x}}
Derive : %{{y:+,.0f}}")) fig.add_hline(y=0,line_dash="dot",line_color=_C["neutral"],line_width=1) fig.update_layout(**_L(title="DERIVE MONTANT TOTAL PAR VERSION (rouge = hausse)", height=290,xaxis_title="Version",yaxis_title="Ecart vs original")) st.plotly_chart(fig, use_container_width=True) # 2. Amplitude modifications with c2: if "EX_Modification_Pattern_Clustering" in df.columns: fig = go.Figure(go.Bar( x=df["ID_Pret_Updated"].astype(str) if "ID_Pret_Updated" in df.columns else df.index.astype(str), y=df["EX_Modification_Pattern_Clustering"], marker_color=[_C["critical"] if v>=3 else _C["warning"] if v>=2 else _C["accent"] for v in df["EX_Modification_Pattern_Clustering"]], text=df["EX_Modification_Pattern_Clustering"],textposition="outside")) fig.update_layout(**_L(title="AMPLITUDE DES MODIFICATIONS (rouge = 3+ champs)", height=290,showlegend=False,yaxis_title="Champs modifies")) st.plotly_chart(fig, use_container_width=True) c3, c4 = st.columns(2) # 3. Scatter delta vs velocity with c3: if "DX_Contract_Volatility_Delta" in df.columns and "DX_Update_Velocity" in df.columns: fig = go.Figure(go.Scatter( x=df["DX_Update_Velocity"],y=df["DX_Contract_Volatility_Delta"],mode="markers", marker=dict(size=12,color=[_C["critical"] if v>0 else _C["success"] for v in df["DX_Contract_Volatility_Delta"]], line=dict(width=1.5,color=_C["bg"])), text=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str), hovertemplate="%{text}
Velocity : %{x}j
Delta : %{y:+,.0f}")) fig.add_hline(y=0,line_dash="dot",line_color=_C["neutral"],line_width=1) fig.update_layout(**_L(title="DELTA CONTRAT vs VELOCITY",height=280, xaxis_title="Jours depuis creation",yaxis_title="Delta Montant Total",showlegend=False)) st.plotly_chart(fig, use_container_width=True) # 4. Gravite renegociation with c4: if "DX_Renegotiation_Gravity_Score" in df.columns: fig = go.Figure(go.Bar( x=df["ID_Pret_Updated"].astype(str) if "ID_Pret_Updated" in df.columns else df.index.astype(str), y=df["DX_Renegotiation_Gravity_Score"], marker_color=[_C["critical"] if v>0.10 else _C["accent"] for v in df["DX_Renegotiation_Gravity_Score"]], text=df["DX_Renegotiation_Gravity_Score"].round(3),textposition="outside")) fig.update_layout(**_L(title="GRAVITE RENEGOCIATION (score keywords crise)", height=280,showlegend=False)) st.plotly_chart(fig, use_container_width=True) c5, c6 = st.columns(2) # 5. Evolution taux par version with c5: if not prt.empty and "Taux_Hebdo" in prt.columns and "Taux_Hebdo" in df.columns: taux_orig = prt.set_index("ID_Pret")["Taux_Hebdo"].to_dict() fig = go.Figure() for pid in df["ID_Pret"].dropna().unique(): sub = df[df["ID_Pret"]==pid].sort_values("Version") orig = taux_orig.get(pid, np.nan) if not (isinstance(orig,float) and np.isnan(orig)): vals = [orig] + sub["Taux_Hebdo"].tolist() vers = ["V1"] + [f"V{v}" for v in sub["Version"]] col = _C["critical"] if vals[-1]>orig else _C["success"] fig.add_trace(go.Scatter(x=vers,y=vals,mode="lines+markers",name=str(pid), line=dict(color=col,width=1.5),marker=dict(size=6), hovertemplate=f"{pid} %{{x}}
Taux : %{{y:.2%}}")) fig.update_layout(**_L(title="EVOLUTION TAUX HEBDO PAR VERSION",height=280, yaxis=dict(tickformat=".2%"))) st.plotly_chart(fig, use_container_width=True) # 6. Fragilite structurelle with c6: if "DX_Structural_Fragility_Index" in df.columns: fig = go.Figure(go.Bar( x=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str), y=df["DX_Structural_Fragility_Index"], marker_color=[_C["critical"] if v>0.15 else _C["warning"] if v>0.08 else _C["accent"] for v in df["DX_Structural_Fragility_Index"]], text=df["DX_Structural_Fragility_Index"].round(4),textposition="outside")) fig.update_layout(**_L(title="FRAGILITE STRUCTURELLE (modifications/semaines duree)", height=280,showlegend=False)) st.plotly_chart(fig, use_container_width=True) # 7. PCA 2D Prets Update st.divider() _ctx("CLUSTER MAP PCA 2D — Similarite des modifications contractuelles") upd_feats_pca = [ "Version","Montant_Capital","Taux_Hebdo","Taux_Endettement", "Duree_Semaines","Montant_Total","Cout_Credit", "DX_Contract_Volatility_Delta","DX_Update_Velocity", "DX_Renegotiation_Gravity_Score","DX_Structural_Fragility_Index", "EX_Modification_Pattern_Clustering","EX_Version_Drift_Analysis", ] df_upd_pca = df.copy() df_upd_pca["_grav"] = df_upd_pca.get("DX_Renegotiation_Gravity_Score", pd.Series(0, index=df_upd_pca.index)).fillna(0) raw_drift = df_upd_pca.get("EX_Version_Drift_Analysis", pd.Series(0, index=df_upd_pca.index)).fillna(0) mx_drift = raw_drift.abs().max() df_upd_pca["_drift_n"] = (raw_drift / mx_drift).round(3) if mx_drift > 0 else pd.Series(0.0, index=df_upd_pca.index) u_pca1, u_pca2 = st.columns(2) with u_pca1: fig = _pca_ex(df_upd_pca, upd_feats_pca, "ID_Pret", "_grav", "Gravite reneg.", "UPDATES (couleur = Gravite Renegociation)") st.plotly_chart(fig, use_container_width=True) with u_pca2: fig = _pca_ex(df_upd_pca, upd_feats_pca, "ID_Pret", "_drift_n", "Derive montant", "UPDATES (couleur = Derive Montant Total)", colorscale=[[0,"#58a6ff"],[0.5,"#f39c12"],[1,"#c0392b"]]) st.plotly_chart(fig, use_container_width=True) def _explore_remboursements(df: pd.DataFrame, sheets: dict): if df.empty: st.info("Table Remboursements vide."); return _ctx("SOURCE : Remboursements | AXEE : comportement temporel, " "correlation montant/retard, fenetres horaires d'encaissement") c1, c2 = st.columns(2) # 1. Scatter delai vs montant with c1: sp_c = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"], "ANTICIPE":_C["accent"],"PAYE":_C["success"],"PARTIEL":_C["warning"]} anomaly = df.get("EX_Timestamp_Anomaly_Log", pd.Series(0, index=df.index)).fillna(0) fig = go.Figure() for sp in df["Statut_Paiement"].dropna().unique(): mask = df["Statut_Paiement"]==sp; sub = df[mask]; a = anomaly[mask] fig.add_trace(go.Scatter(x=sub["Jours_Retard"],y=sub["Montant_Verse"],mode="markers",name=str(sp), marker=dict(size=[14 if x else 9 for x in a], color=sp_c.get(str(sp).upper(),_C["neutral"]), symbol=["x" if x else "circle" for x in a], line=dict(width=1.5,color=_C["bg"])), text=sub["ID_Pret"].astype(str), hovertemplate=f"{sp}
Retard : %{{x}}j
Montant : %{{y:,}}")) fig.add_vline(x=0,line_dash="dot",line_color=_C["neutral"],line_width=1) fig.update_layout(**_L(title="DELAI vs MONTANT (X = heure atypique)",height=310, xaxis_title="Jours Retard",yaxis_title="Montant Verse")) st.plotly_chart(fig, use_container_width=True) # 2. Heatmap temporelle with c2: ts = pd.to_datetime(df.get("Timestamp", pd.Series(dtype=str)), errors="coerce") df2 = df.copy(); df2["Heure"] = ts.dt.hour jmap = {"Monday":"Lundi","Tuesday":"Mardi","Wednesday":"Mercredi", "Thursday":"Jeudi","Friday":"Vendredi","Saturday":"Samedi","Sunday":"Dimanche"} df2["Jour"] = ts.dt.day_name().map(jmap) jord = ["Lundi","Mardi","Mercredi","Jeudi","Vendredi","Samedi","Dimanche"] df2v = df2.dropna(subset=["Heure","Jour"]) if not df2v.empty: pivot = df2v.groupby(["Jour","Heure"]).size().reset_index(name="n") fig = go.Figure(go.Heatmap( x=pivot["Heure"],y=pivot["Jour"],z=pivot["n"], colorscale=[[0,_C["card"]],[0.4,"#1a3a5c"],[1,_C["ex"]]], text=pivot["n"],texttemplate="%{text}", hovertemplate="%{y} %{x}h00 : %{z} paiements", colorbar=dict(title="Paiements",tickfont=dict(size=9,family=_FONT)), xgap=2,ygap=2)) fig.update_layout(**_L(title="FENETRES D'ENCAISSEMENT (jour x heure)",height=310, xaxis=dict(title="Heure",dtick=2), yaxis=dict(categoryorder="array",categoryarray=jord))) st.plotly_chart(fig, use_container_width=True) else: st.plotly_chart(_ef("Timestamps non parsables.",310), use_container_width=True) c3, c4 = st.columns(2) # 3. Bar propension paiement anticipe par client with c3: if "ID_Client" in df.columns and "EX_Early_Payment_Propensity" in df.columns: prop = df.groupby("ID_Client")["EX_Early_Payment_Propensity"].mean().sort_values(ascending=False) fig = go.Figure(go.Bar(x=prop.index.astype(str),y=prop.values, marker_color=[_C["success"] if v>0.5 else _C["warning"] if v>0 else _C["critical"] for v in prop.values], text=[f"{v:.0%}" for v in prop.values],textposition="outside")) fig.update_layout(**_L(title="PROPENSION PAIEMENT ANTICIPE PAR CLIENT", height=280,showlegend=False,yaxis=dict(tickformat=".0%"))) st.plotly_chart(fig, use_container_width=True) # 4. Scatter montant vs solde avant with c4: if "Solde_Avant" in df.columns: fig = go.Figure() sp_c2 = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"],"ANTICIPE":_C["accent"]} for sp in df["Statut_Paiement"].dropna().unique(): mask = df["Statut_Paiement"]==sp; sub = df[mask] fig.add_trace(go.Scatter(x=sub["Solde_Avant"],y=sub["Montant_Verse"],mode="markers",name=str(sp), marker=dict(size=10,color=sp_c2.get(str(sp).upper(),_C["neutral"]),line=dict(width=1,color=_C["bg"])), hovertemplate=f"{sp}
Solde : %{{x:,}}
Verse : %{{y:,}}")) mx = max(df["Solde_Avant"].max(),df["Montant_Verse"].max())*1.05 fig.add_trace(go.Scatter(x=[0,mx],y=[0,mx],mode="lines", line=dict(color=_C["subtext"],dash="dot",width=1),showlegend=False,hoverinfo="skip")) fig.update_layout(**_L(title="MONTANT VERSE vs SOLDE INITIAL",height=280, xaxis_title="Solde Avant",yaxis_title="Montant Verse")) st.plotly_chart(fig, use_container_width=True) c5, c6 = st.columns(2) # 5. Bar coherence du paiement par client with c5: if "ID_Client" in df.columns and "EX_Payment_Consistency_Score" in df.columns: cons = df.groupby("ID_Client")["EX_Payment_Consistency_Score"].mean().sort_values() fig = go.Figure(go.Bar(x=cons.index.astype(str),y=cons.values, marker_color=[_C["critical"] if v>5000 else _C["warning"] if v>2000 else _C["success"] for v in cons.values], text=cons.round(0).astype(int).tolist(),textposition="outside")) fig.update_layout(**_L(title="ECART-TYPE MONTANT VERSE PAR CLIENT (coherence)", height=280,showlegend=False,yaxis_title="Ecart-type")) st.plotly_chart(fig, use_container_width=True) # 6. Timeline chronologique with c6: d2 = df.copy(); d2["Date_Paiement"] = pd.to_datetime(d2.get("Date_Paiement"),errors="coerce") d2 = d2.dropna(subset=["Date_Paiement"]) if not d2.empty: sp_c3 = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"],"ANTICIPE":_C["accent"]} fig = go.Figure() for sp in d2["Statut_Paiement"].dropna().unique(): mask = d2["Statut_Paiement"]==sp; sub = d2[mask] fig.add_trace(go.Scatter(x=sub["Date_Paiement"],y=sub["Montant_Verse"],mode="markers",name=str(sp), marker=dict(size=10,color=sp_c3.get(str(sp).upper(),_C["neutral"]),line=dict(width=1.5,color=_C["bg"])), text=sub["ID_Pret"].astype(str), hovertemplate="%{text}
%{x|%d/%m/%Y}
%{y:,}")) fig.update_layout(**_L(title="TIMELINE CHRONOLOGIQUE DES PAIEMENTS",height=280)) st.plotly_chart(fig, use_container_width=True) # 7. PCA 2D Remboursements st.divider() _ctx("CLUSTER MAP PCA 2D — Comportements de paiement regroupes") rem_feats_pca = [ "Montant_Verse","Montant_Principal","Montant_Interets","Penalites_Retard", "Solde_Avant","Solde_Apres","Jours_Retard", "DX_Profitability_Realization_Rate","DX_Overpayment_Tendency", "DX_Channel_Risk_Correlation","DX_Temporal_Payment_Behavior", "EX_Early_Payment_Propensity","EX_Payment_Consistency_Score", "EX_Collection_Cost_Efficiency", ] df_rem_pca = df.copy() # Score risque remboursement : retard positif normalise jr = df_rem_pca["Jours_Retard"].clip(lower=0) jr_max = jr.max() df_rem_pca["_rem_risk"] = (jr / jr_max).round(3) if jr_max > 0 else pd.Series(0.0, index=df_rem_pca.index) r_pca1, r_pca2 = st.columns(2) with r_pca1: fig = _pca_ex(df_rem_pca, rem_feats_pca, "ID_Pret", "_rem_risk", "Risque retard", "REMBOURSEMENTS (couleur = Jours Retard norm.)") st.plotly_chart(fig, use_container_width=True) with r_pca2: fig = _pca_ex(df_rem_pca, rem_feats_pca, "ID_Pret", "Montant_Verse", "Montant", "REMBOURSEMENTS (couleur = Montant Verse)", colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]]) st.plotly_chart(fig, use_container_width=True) def _explore_ajustements(df: pd.DataFrame, sheets: dict): if df.empty: st.markdown( f'
' f'
STATUS OPERATIONNEL
' f'
TABLE VIDE EN ATTENTE D\'ALIMENTATION
' f'
' f'Analyses causales actives des le premier ajustement enregistre.
' f'
', unsafe_allow_html=True) return _ctx("SOURCE : Ajustements_Echeances + Prets_Master | AXEE : " "frequence, impact financier, correlation defauts futurs") c1, c2 = st.columns(2) with c1: cnt = df["Raison"].value_counts() r2d = df.groupby("Raison")["EX_Reason_to_Default_Bridge"].mean() if "EX_Reason_to_Default_Bridge" in df.columns else pd.Series(0,index=cnt.index) bc = [_C["critical"] if r2d.get(r,0)>0.3 else _C["warning"] if r2d.get(r,0)>0.1 else _C["accent"] for r in cnt.index] fig = go.Figure(go.Bar(x=cnt.values,y=cnt.index.astype(str),orientation="h", marker_color=bc,text=cnt.values,textposition="outside")) fig.update_layout(**_L(title="RAISONS (couleur = pont defaut)",height=300,showlegend=False)) st.plotly_chart(fig, use_container_width=True) with c2: if "EX_Timing_of_Adjustment" in df.columns and "EX_Amount_Expansion_Factor" in df.columns: fig = go.Figure(go.Scatter( x=df["EX_Timing_of_Adjustment"].fillna(0), y=df["EX_Amount_Expansion_Factor"].fillna(0), mode="markers", marker=dict(size=12,color=_C["ex"],line=dict(width=1.5,color=_C["bg"])), text=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str), hovertemplate="%{text}
Timing : %{x}j
Expansion : %{y:.2%}")) fig.update_layout(**_L(title="TIMING vs EXPANSION MONTANT",height=300,showlegend=False, xaxis_title="Jours avant fin",yaxis_title="Facteur expansion")) st.plotly_chart(fig, use_container_width=True) # ── Routeur public ──────────────────────────────────────────────────────────── _MAP = { "CLIENTS": ("Clients_KYC", _explore_clients), "GARANTS": ("Garants_KYC", _explore_garants), "PRETS": ("Prets_Master", _explore_prets), "PRETS_UPDATE": ("Prets_Update", _explore_prets_update), "PRETS UPDATE": ("Prets_Update", _explore_prets_update), "REMBOURSEMENTS": ("Remboursements", _explore_remboursements), "AJUSTEMENTS": ("Ajustements_Echeances", _explore_ajustements), } def render_exploration(sheets: dict, section: str): section_up = section.upper().replace("_"," ").strip() for key in [section, section_up, section.upper()]: if key in _MAP: tab_key, fn = _MAP[key] fn(sheets.get(tab_key, pd.DataFrame()), sheets) return st.warning(f"Section inconnue : {section}")