Vortex-Flux / src /Analytics /Data_exploration.py
klydekushy's picture
Update src/Analytics/Data_exploration.py
38f1b57 verified
"""
DATA_EXPLORATION.PY Unite 2 : Analyses Exploratoires
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Fonctions publiques :
enrich_ex(sheets) -> 27 scores EX_* silencieux
render_filter_bar(sheets) -> filtre global, retourne sheets filtre
render_exploration(sheets, sec) -> visuels U2 (6+ par feuille)
"""
import re
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
_C = {
"bg": "#0a0e12",
"card": "#0f141a",
"border": "rgba(80,100,120,0.25)",
"accent": "#58a6ff",
"ex": "#f39c12",
"success": "#2ecc71",
"warning": "#f39c12",
"critical": "#c0392b",
"neutral": "#5a6a7a",
"text": "#a8b8c8",
"subtext": "#5a6a7a",
"grid": "rgba(80,100,120,0.12)",
}
_PAL = ["#58a6ff","#2ecc71","#f39c12","#c0392b","#a78bfa","#38bdf8","#fb7185","#34d399"]
_FONT = "JetBrains Mono, Courier New, monospace"
_BASE = dict(
paper_bgcolor=_C["card"], plot_bgcolor=_C["card"],
font=dict(color=_C["text"], family=_FONT, size=11),
title_font=dict(size=12, color=_C["text"], family=_FONT),
margin=dict(t=44, b=36, l=48, r=24),
legend=dict(bgcolor=_C["bg"], bordercolor=_C["border"], borderwidth=1,
font=dict(size=10, family=_FONT)),
xaxis=dict(gridcolor=_C["grid"], zerolinecolor=_C["grid"],
tickfont=dict(family=_FONT, size=10)),
yaxis=dict(gridcolor=_C["grid"], zerolinecolor=_C["grid"],
tickfont=dict(family=_FONT, size=10)),
colorway=_PAL,
)
# ── Helpers ───────────────────────────────────────────────────────────────────
def _L(**kw):
m = {**_BASE}
for ax in ("xaxis", "yaxis"):
if ax in kw:
m[ax] = {**m[ax], **kw.pop(ax)}
m.update(kw)
return m
def _ef(msg, h=220):
fig = go.Figure()
fig.add_annotation(text=msg, xref="paper", yref="paper", x=0.5, y=0.5,
showarrow=False, font=dict(size=12, color=_C["subtext"], family=_FONT))
fig.update_layout(**_L(height=h))
return fig
def _age_calc(series):
today = datetime.today()
def _a(v):
try:
d = pd.to_datetime(str(v), dayfirst=True, errors="coerce")
return int((today - d).days / 365.25) if not pd.isna(d) else np.nan
except Exception:
return np.nan
return series.apply(_a)
def _safe(df, col, default=0):
return df[col].fillna(default) if col in df.columns else pd.Series(default, index=df.index)
def _ctx(text: str):
"""Bandeau de contexte orange, signature Exploration."""
st.markdown(
f'<div style="border-left:3px solid {_C["ex"]};padding:5px 12px;'
f'margin-bottom:14px;font-family:{_FONT};font-size:0.62rem;'
f'color:{_C["subtext"]};letter-spacing:0.8px;">{text}</div>',
unsafe_allow_html=True,
)
def _age_bin(series_age: pd.Series) -> pd.Series:
"""Convertit une serie d'ages en tranches textuelles. Pas de .cat.categories."""
bins = [0, 25, 35, 45, 55, 120]
labs = ["<25", "25-34", "35-44", "45-54", "55+"]
# pd.cut retourne une Categorical; on la convertit en str pour eviter .cat
result = pd.cut(series_age, bins=bins, labels=labs, right=False)
return result.astype(str).replace("nan", np.nan)
# ══════════════════════════════════════════════════════════════════════════════
# ENRICHISSEMENT EX_* (silencieux - 27 scores)
# ══════════════════════════════════════════════════════════════════════════════
def _pca_ex(df: pd.DataFrame, feat_cols: list, label_col: str,
color_col: str, color_title: str, title: str,
colorscale=None) -> go.Figure:
"""PCA 2D generique pour l'exploration β€” signature Exploration (orange)."""
if colorscale is None:
colorscale = [[0,"#2ecc71"],[0.5,"#f39c12"],[1,"#c0392b"]]
avail = [c for c in feat_cols if c in df.columns and
pd.api.types.is_numeric_dtype(df[c])]
if len(df) < 3 or len(avail) < 2:
return _ef(f"PCA indisponible ({len(df)} lignes, {len(avail)} features numeriques).", 340)
X = df[avail].fillna(df[avail].median()).values
try:
X_sc = StandardScaler().fit_transform(X)
n_c = min(2, X_sc.shape[0], X_sc.shape[1])
pca = PCA(n_components=n_c, random_state=42)
comp = pca.fit_transform(X_sc)
var1 = pca.explained_variance_ratio_[0] * 100
var2 = pca.explained_variance_ratio_[1] * 100 if n_c > 1 else 0.0
except Exception as e:
return _ef(f"PCA erreur : {e}", 340)
pc2 = comp[:,1] if n_c > 1 else [0]*len(comp)
labels = df[label_col].astype(str).str[:12] if label_col in df.columns else df.index.astype(str)
color_vals = df[color_col].fillna(df[color_col].median()) if color_col in df.columns else pd.Series(0, index=df.index)
fig = go.Figure(go.Scatter(
x=comp[:,0], y=pc2, mode="markers+text",
text=labels, textposition="top center",
textfont=dict(size=8, family=_FONT, color=_C["subtext"]),
marker=dict(size=12, color=color_vals, colorscale=colorscale,
showscale=True,
colorbar=dict(title=color_title, tickfont=dict(size=9, family=_FONT), len=0.65),
line=dict(width=1.5, color=_C["bg"])),
hovertemplate=(
"<b>%{text}</b><br>PC1 : %{x:.2f}<br>PC2 : %{y:.2f}<br>"
+ color_title + " : %{marker.color:.3f}<extra></extra>"
),
))
fig.update_layout(**_L(
title=f"{title} ({var1:.0f}%+{var2:.0f}% var.)",
height=340, showlegend=False,
xaxis_title=f"PC1 ({var1:.1f}%)", yaxis_title=f"PC2 ({var2:.1f}%)",
))
return fig
def enrich_ex(sheets: dict) -> dict:
s = {k: df.copy() for k, df in sheets.items() if isinstance(df, pd.DataFrame)}
cli = s.get("Clients_KYC", pd.DataFrame())
gar = s.get("Garants_KYC", pd.DataFrame())
prt = s.get("Prets_Master", pd.DataFrame())
upd = s.get("Prets_Update", pd.DataFrame())
rem = s.get("Remboursements", pd.DataFrame())
adj = s.get("Ajustements_Echeances", pd.DataFrame())
# ── Clients (5) ───────────────────────────────────────────────────────────
if not cli.empty:
# EX_Income_Volatility_Matrix
if "Secteur_Activite" in cli.columns:
rev = _safe(cli, "Revenus_Mensuels").astype(float)
autre = _safe(cli, "Autres_Revenus").astype(float)
ratio = autre / rev.replace(0, np.nan)
cli["EX_Income_Volatility_Matrix"] = (
ratio.groupby(cli["Secteur_Activite"])
.transform(lambda x: x.std(ddof=0) if len(x) > 1 else 0.0)
).round(3)
else:
cli["EX_Income_Volatility_Matrix"] = 0.0
# EX_AML_Risk_Correlation
risky = {"ESPECES","INCONNU","TIERS","NON_DECLARE"}
def _aml(row):
aml = str(row.get("Verification_AML","")).upper()
org = str(row.get("Origine_Fonds","")).upper()
return 1 if aml != "VERIFIE" and any(r in org for r in risky) else 0
cli["EX_AML_Risk_Correlation"] = cli.apply(_aml, axis=1)
# EX_Contact_Reliability_Index
def _contact(row):
sc = 0
ph = str(row.get("Telephone","")).strip()
em = str(row.get("Email","")).strip()
sc += 2 if re.match(r"^\+[\d\s\-]{9,15}$", ph) else (1 if re.match(r"[\d\+][\d\s\-]{7,}", ph) else 0)
sc += 2 if re.match(r"^[^@\s]{2,}@[^@\s]{2,}\.[a-zA-Z]{2,6}$", em) else 0
return sc
cli["EX_Contact_Reliability_Index"] = cli.apply(_contact, axis=1)
# EX_Neighborhood_Income_Benchmark
if "Quartier" in cli.columns and "Revenus_Mensuels" in cli.columns:
rev_q = cli.groupby("Quartier")["Revenus_Mensuels"].transform("mean")
cli["EX_Neighborhood_Income_Benchmark"] = (
_safe(cli,"Revenus_Mensuels") / rev_q.replace(0, np.nan)
).round(3)
else:
cli["EX_Neighborhood_Income_Benchmark"] = 1.0
# EX_Professional_Seniority_Efficiency
rn = (_safe(cli,"Revenus_Mensuels") - _safe(cli,"Charges_Estimees")).replace(0, np.nan)
cli["EX_Professional_Seniority_Efficiency"] = (
rn / _safe(cli,"Anciennete_Emploi",1).replace(0, np.nan)
).round(2)
s["Clients_KYC"] = cli
# ── Garants (5) ──────────────────────────────────────────────────────────
if not gar.empty:
gar["EX_Guarantor_Disposable_Income"] = (
_safe(gar,"Revenus_Mensuels") - _safe(gar,"Charges_Estimees")
).round(0)
cf = _safe(gar,"Checking_frequency",1).replace(0, np.nan)
gar["EX_Wealth_to_Risk_Ratio"] = (_safe(gar,"Patrimoine_Declare") / cf).round(2)
if "Ville" in gar.columns and "Secteur_Activite" in gar.columns:
gar["EX_Guarantee_Diversity_Score"] = gar.groupby("Ville")["Secteur_Activite"].transform("nunique")
else:
gar["EX_Guarantee_Diversity_Score"] = 1
if not cli.empty and "ID_Client" in cli.columns and "Moyen_Transfert" in cli.columns:
cli_mt = set(cli["Moyen_Transfert"].str.upper().dropna().tolist())
gar["EX_Bank_Transfer_Interoperability"] = gar.apply(
lambda row: 1 if str(row.get("Moyen_Transfert","")).upper() in cli_mt else 0, axis=1)
else:
gar["EX_Bank_Transfer_Interoperability"] = 0
rev_tot = (_safe(gar,"Revenus_Mensuels") + _safe(gar,"Autres_Revenus")).replace(0, np.nan)
gar["EX_Garant_Liability_Pressure"] = (_safe(gar,"Charges_Estimees") / rev_tot).round(3)
s["Garants_KYC"] = gar
# ── Prets (5) ─────────────────────────────────────────────────────────────
if not prt.empty:
if not rem.empty and "ID_Pret" in rem.columns:
int_enc = rem.groupby("ID_Pret")["Montant_Interets"].sum().to_dict()
prt["EX_Effective_Yield_Realization"] = (
_safe(prt,"ID_Pret").map(int_enc).fillna(0)
/ _safe(prt,"Cout_Credit",1).replace(0, np.nan)
).round(3)
else:
prt["EX_Effective_Yield_Realization"] = np.nan
if "Motif" in prt.columns and "Statut" in prt.columns:
def_mask = prt["Statut"].str.upper().isin(["DEFAUT","LITIGE"])
def _def_by_motif(motif_val):
sub = prt[prt["Motif"] == motif_val]
if sub.empty: return 0.0
return round(sub["Statut"].str.upper().isin(["DEFAUT","LITIGE"]).mean(), 3)
prt["EX_Default_Probability_By_Motif"] = prt["Motif"].apply(_def_by_motif)
else:
prt["EX_Default_Probability_By_Motif"] = 0.0
if not cli.empty and "ID_Client" in cli.columns and "Pers_Charge" in cli.columns:
pch_map = cli.set_index("ID_Client")["Pers_Charge"].to_dict()
prt["_pch"] = _safe(prt,"ID_Client").map(pch_map).fillna(1).replace(0,1)
prt["EX_Loan_Size_vs_Family_Size"] = (_safe(prt,"Montant_Capital") / prt["_pch"]).round(0)
prt.drop(columns=["_pch"], inplace=True, errors="ignore")
else:
prt["EX_Loan_Size_vs_Family_Size"] = np.nan
if "Type_Pret" in prt.columns:
prt["EX_Duration_Anomaly_Detection"] = (
prt.groupby("Type_Pret")["Duree_Semaines"]
.transform(lambda x: (x - x.mean()) / x.std() if x.std() > 0 else 0.0)
).round(2)
else:
prt["EX_Duration_Anomaly_Detection"] = 0.0
prt["EX_Endettement_Critical_Threshold"] = (
_safe(prt,"Taux_Endettement").astype(float) > 40.0
).astype(int)
s["Prets_Master"] = prt
# ── Prets_Update (2) ─────────────────────────────────────────────────────
if not upd.empty:
modif_cols = ["Montant_Capital","Taux_Hebdo","Duree_Semaines","Motif","Moyen_Transfert"]
if not prt.empty and "ID_Pret" in prt.columns:
def _pattern(row):
base = prt[prt["ID_Pret"] == row.get("ID_Pret","")]
if base.empty: return 0
base = base.iloc[0]
return sum(1 for col in modif_cols if col in row and col in base and str(row[col]) != str(base[col]))
upd["EX_Modification_Pattern_Clustering"] = upd.apply(_pattern, axis=1)
else:
upd["EX_Modification_Pattern_Clustering"] = 0
if not prt.empty and "Montant_Total" in prt.columns and "ID_Pret" in upd.columns:
mt_orig = prt.set_index("ID_Pret")["Montant_Total"].to_dict()
upd["EX_Version_Drift_Analysis"] = (
_safe(upd,"Montant_Total") - _safe(upd,"ID_Pret").map(mt_orig).fillna(0)
).round(0)
else:
upd["EX_Version_Drift_Analysis"] = np.nan
s["Prets_Update"] = upd
# ── Remboursements (5) ────────────────────────────────────────────────────
if not rem.empty:
rem["EX_Early_Payment_Propensity"] = (_safe(rem,"Jours_Retard").astype(float) < 0).astype(int)
pen = _safe(rem,"Penalites_Retard").astype(float)
mv = _safe(rem,"Montant_Verse").astype(float)
rem["EX_Collection_Cost_Efficiency"] = (mv / (mv + pen).replace(0, np.nan)).round(4)
def _anomaly_hour(ts):
try:
h = pd.to_datetime(str(ts), errors="coerce").hour
return 1 if pd.notna(h) and (h >= 23 or h < 5) else 0
except Exception:
return 0
rem["EX_Timestamp_Anomaly_Log"] = rem.get("Timestamp", pd.Series(dtype=str)).apply(_anomaly_hour)
rem["EX_Payment_Consistency_Score"] = (
rem.groupby("ID_Client")["Montant_Verse"].transform(lambda x: x.std(ddof=0)).fillna(0).round(2)
) if "ID_Client" in rem.columns else 0.0
if "Moyen_Paiement" in rem.columns and "Statut_Paiement" in rem.columns:
good = rem["Statut_Paiement"].str.upper().isin(["PAYE","PONCTUEL","ANTICIPE"])
rate = rem.groupby("Moyen_Paiement").apply(lambda g: good.reindex(g.index).mean())
rem["EX_Recovery_Channel_Success"] = _safe(rem,"Moyen_Paiement").map(rate).round(3)
else:
rem["EX_Recovery_Channel_Success"] = np.nan
s["Remboursements"] = rem
# ── Ajustements (5) ──────────────────────────────────────────────────────
if not adj.empty:
if "ID_Pret" in adj.columns and not prt.empty and "ID_Client" in prt.columns:
cli_map = prt.set_index("ID_Pret")["ID_Client"].to_dict()
adj["_cli"] = _safe(adj,"ID_Pret").map(cli_map)
adj["EX_Adjustment_Frequency_Per_Client"] = adj.groupby("_cli")["_cli"].transform("count").fillna(0)
adj.drop(columns=["_cli"], inplace=True, errors="ignore")
else:
adj["EX_Adjustment_Frequency_Per_Client"] = 0
if not prt.empty and "Statut" in prt.columns and "Raison" in adj.columns:
def _r2d(raison):
ids = adj[adj["Raison"] == raison].get("ID_Pret", pd.Series(dtype=str))
sub = prt[prt["ID_Pret"].isin(ids)]
return 0.0 if sub.empty else round(sub["Statut"].str.upper().isin(["DEFAUT","LITIGE"]).mean(), 3)
adj["EX_Reason_to_Default_Bridge"] = adj.get("Raison", pd.Series(dtype=str)).apply(_r2d)
else:
adj["EX_Reason_to_Default_Bridge"] = np.nan
if not prt.empty and "ID_Pret" in adj.columns and "Montant_Capital" in prt.columns:
cap_map = prt.set_index("ID_Pret")["Montant_Capital"].to_dict()
cap_init = _safe(adj,"ID_Pret").map(cap_map).replace(0, np.nan)
adj["EX_Amount_Expansion_Factor"] = (_safe(adj,"Montant_Additionnel") / cap_init).round(4)
else:
adj["EX_Amount_Expansion_Factor"] = np.nan
if not prt.empty and "Date_Fin" in prt.columns:
fm = prt.set_index("ID_Pret")["Date_Fin"].to_dict()
df_ = pd.to_datetime(_safe(adj,"ID_Pret").map(fm), errors="coerce")
da = pd.to_datetime(adj.get("Date_Creation"), errors="coerce")
adj["EX_Timing_of_Adjustment"] = (df_ - da).dt.days
else:
adj["EX_Timing_of_Adjustment"] = np.nan
if not prt.empty and "Date_Creation" in prt.columns:
dc_map = prt.set_index("ID_Pret")["Date_Creation"].to_dict()
dp = pd.to_datetime(_safe(adj,"ID_Pret").map(dc_map), errors="coerce")
da = pd.to_datetime(adj.get("Date_Creation"), errors="coerce")
adj["EX_Adjustment_Latency"] = (da - dp).dt.days
else:
adj["EX_Adjustment_Latency"] = np.nan
s["Ajustements_Echeances"] = adj
return s
# ══════════════════════════════════════════════════════════════════════════════
# FILTRE GLOBAL
# ══════════════════════════════════════════════════════════════════════════════
def render_filter_bar(sheets: dict) -> dict:
"""Affiche les filtres, retourne un nouveau dict de DataFrames filtrΓ©s."""
st.markdown(
f'<div style="font-family:{_FONT};font-size:0.55rem;letter-spacing:2px;'
f'text-transform:uppercase;color:{_C["subtext"]};margin-bottom:6px;">'
f'FILTRE GLOBAL Β· EXPLORATION</div>',
unsafe_allow_html=True,
)
prt = sheets.get("Prets_Master", pd.DataFrame())
rem = sheets.get("Remboursements", pd.DataFrame())
cli = sheets.get("Clients_KYC", pd.DataFrame())
fc1, fc2, fc3, fc4 = st.columns(4)
with fc1:
statuts = ["Tous"] + (sorted(prt["Statut"].dropna().unique().tolist()) if not prt.empty and "Statut" in prt.columns else [])
f_statut = st.selectbox("Statut pret", statuts, key="ex_f_statut")
with fc2:
offres = ["Tous"] + (sorted(prt["Offre"].dropna().unique().tolist()) if not prt.empty and "Offre" in prt.columns else [])
f_offre = st.selectbox("Offre", offres, key="ex_f_offre")
with fc3:
sp_vals = ["Tous"] + (sorted(rem["Statut_Paiement"].dropna().unique().tolist()) if not rem.empty and "Statut_Paiement" in rem.columns else [])
f_sp = st.selectbox("Statut paiement", sp_vals, key="ex_f_sp")
with fc4:
villes = ["Tous"] + (sorted(cli["Ville"].dropna().unique().tolist()) if not cli.empty and "Ville" in cli.columns else [])
f_ville = st.selectbox("Ville", villes, key="ex_f_ville")
st.divider()
# Appliquer les filtres
filtered = {k: df.copy() for k, df in sheets.items() if isinstance(df, pd.DataFrame)}
if f_statut != "Tous" and not prt.empty and "Statut" in prt.columns:
ids_ok = set(prt[prt["Statut"] == f_statut].get("ID_Pret", pd.Series(dtype=str)).tolist())
for sh in ("Prets_Master","Prets_Update","Remboursements","Ajustements_Echeances"):
df = filtered.get(sh, pd.DataFrame())
if not df.empty and "ID_Pret" in df.columns:
filtered[sh] = df[df["ID_Pret"].isin(ids_ok)]
if not filtered.get("Prets_Master",pd.DataFrame()).empty:
ids_cli = set(filtered["Prets_Master"].get("ID_Client", pd.Series(dtype=str)).dropna().tolist())
if not filtered.get("Clients_KYC",pd.DataFrame()).empty and "ID_Client" in filtered["Clients_KYC"].columns:
filtered["Clients_KYC"] = filtered["Clients_KYC"][filtered["Clients_KYC"]["ID_Client"].isin(ids_cli)]
if f_offre != "Tous" and "Prets_Master" in filtered and "Offre" in filtered["Prets_Master"].columns:
filtered["Prets_Master"] = filtered["Prets_Master"][filtered["Prets_Master"]["Offre"] == f_offre]
if f_sp != "Tous" and "Remboursements" in filtered and "Statut_Paiement" in filtered["Remboursements"].columns:
filtered["Remboursements"] = filtered["Remboursements"][filtered["Remboursements"]["Statut_Paiement"] == f_sp]
if f_ville != "Tous" and "Clients_KYC" in filtered and "Ville" in filtered["Clients_KYC"].columns:
filtered["Clients_KYC"] = filtered["Clients_KYC"][filtered["Clients_KYC"]["Ville"] == f_ville]
return filtered
# ══════════════════════════════════════════════════════════════════════════════
# VISUELS U2 (6+ par feuille, axes causaux)
# ══════════════════════════════════════════════════════════════════════════════
def _explore_clients(df: pd.DataFrame, sheets: dict):
if df.empty:
st.info("Table Clients_KYC vide apres filtres."); return
_ctx("SOURCE : Clients_KYC | AXEE : mecanismes de survie financiere, "
"concentration geographique, profils demographiques")
c1, c2 = st.columns(2)
# 1. Matrice survie financiere
with c1:
rev = _safe(df,"Revenus_Mensuels").astype(float)
chg = _safe(df,"Charges_Estimees").astype(float)
ral = rev - chg
lgt_col = df.get("Statut_Logement", pd.Series("Autre", index=df.index)).fillna("Autre")
lgt_map = {"Locataire":_C["warning"],"Proprietaire":_C["success"],
"Heberge":_C["accent"],"HΓ©bergΓ©":_C["accent"]}
fig = go.Figure()
for lval in lgt_col.unique():
mask = lgt_col == lval; sub = df[mask]
fig.add_trace(go.Scatter(
x=rev[mask], y=chg[mask], mode="markers", name=str(lval),
marker=dict(size=12, color=lgt_map.get(str(lval),_C["neutral"]),
line=dict(width=1.5, color=_C["bg"])),
text=sub.get("Nom_Complet", sub.index).astype(str),
customdata=ral[mask].round(0),
hovertemplate="<b>%{text}</b><br>Rev : %{x:,}<br>Chg : %{y:,}<br>Reste : %{customdata:,}<extra></extra>"))
mx = max(rev.max(), chg.max()) * 1.05
fig.add_trace(go.Scatter(x=[0,mx], y=[0,mx], mode="lines",
line=dict(color=_C["critical"],dash="dot",width=1),showlegend=False,hoverinfo="skip"))
fig.update_layout(**_L(title="SURVIE FINANCIERE (ligne rouge = seuil critique)",
height=310, xaxis_title="Revenus", yaxis_title="Charges"))
st.plotly_chart(fig, use_container_width=True)
# 2. Strip plot revenus par secteur
with c2:
if "Secteur_Activite" in df.columns:
bench = df.get("EX_Neighborhood_Income_Benchmark", pd.Series(1.0, index=df.index)).fillna(1.0)
fig = go.Figure()
for i, sec in enumerate(df["Secteur_Activite"].dropna().unique()):
sub = df[df["Secteur_Activite"]==sec]; b = bench.reindex(sub.index).fillna(1.0)
np.random.seed(i)
jit = np.random.uniform(-0.15, 0.15, len(sub))
fig.add_trace(go.Scatter(
x=[str(sec)]*len(sub), y=sub["Revenus_Mensuels"].tolist(), mode="markers",
name=str(sec),
marker=dict(size=10,
color=[_C["success"] if v>=1.0 else _C["critical"] for v in b],
line=dict(width=1, color=_C["bg"]), opacity=0.85),
text=sub.get("Nom_Complet", sub.index).astype(str), customdata=b.round(2),
hovertemplate="<b>%{text}</b><br>%{y:,} FCFA<br>Bench : %{customdata}x<extra></extra>"))
fig.update_layout(**_L(title="DISPERSION REVENUS/SECTEUR (vert = dessus moy. quartier)",
height=310, showlegend=False, xaxis_title="Secteur", yaxis_title="Revenus"))
st.plotly_chart(fig, use_container_width=True)
c3, c4 = st.columns(2)
# 3. Heatmap geographique
with c3:
if "Ville" in df.columns and "Quartier" in df.columns:
pivot = df.groupby(["Ville","Quartier"]).size().reset_index(name="n")
fig = go.Figure(go.Heatmap(
x=pivot["Quartier"], y=pivot["Ville"], z=pivot["n"],
colorscale=[[0,_C["card"]],[0.4,"#1a3a5c"],[1,_C["accent"]]],
text=pivot["n"], texttemplate="%{text}",
hovertemplate="<b>%{y} Β· %{x}</b><br>Clients : %{z}<extra></extra>",
colorbar=dict(title="Clients",tickfont=dict(size=9,family=_FONT)),
xgap=2, ygap=2))
fig.update_layout(**_L(title="DENSITE GEOGRAPHIQUE Ville x Quartier",
height=300, xaxis=dict(tickangle=-30)))
st.plotly_chart(fig, use_container_width=True)
# 4. Parallel categories
with c4:
cols_par = ["Etat_Civil","Genre","Statut_Pro"]
if all(c in df.columns for c in cols_par):
df2 = df[cols_par].fillna("N/A")
fig = go.Figure(go.Parcats(
dimensions=[
dict(label="Etat Civil", values=df2["Etat_Civil"]),
dict(label="Genre", values=df2["Genre"]),
dict(label="Statut Pro", values=df2["Statut_Pro"]),
],
line=dict(color=_PAL[0], colorscale=[[0,_PAL[0]],[1,_PAL[1]]]),
labelfont=dict(family=_FONT, size=10, color=_C["text"]),
tickfont=dict(family=_FONT, size=9, color=_C["subtext"]),
))
fig.update_layout(**_L(title="FLUX DEMOGRAPHIQUE Etat Civil > Genre > Statut Pro",height=300))
st.plotly_chart(fig, use_container_width=True)
c5, c6 = st.columns(2)
# 5. Scatter AML risk vs revenu
with c5:
aml_risk = df.get("EX_AML_Risk_Correlation", pd.Series(0, index=df.index)).fillna(0)
fig = go.Figure()
for val, color, label in [(1,_C["critical"],"Risque AML"),(0,_C["success"],"AML OK")]:
mask = aml_risk == val; sub = df[mask]
if not sub.empty:
fig.add_trace(go.Scatter(x=sub["Revenus_Mensuels"], y=_safe(sub,"Patrimoine_Declare"),
mode="markers", name=label,
marker=dict(size=11, color=color, line=dict(width=1.5, color=_C["bg"])),
text=sub.get("Nom_Complet", sub.index).astype(str),
hovertemplate="<b>%{text}</b><br>Rev : %{x:,}<br>Patr : %{y:,}<extra></extra>"))
fig.update_layout(**_L(title="PROFIL AML x PATRIMOINE",height=290,xaxis_title="Revenus",yaxis_title="Patrimoine"))
st.plotly_chart(fig, use_container_width=True)
# 6. Bar volatilite revenu par secteur
with c6:
if "Secteur_Activite" in df.columns and "EX_Income_Volatility_Matrix" in df.columns:
vol = df.groupby("Secteur_Activite")["EX_Income_Volatility_Matrix"].mean().sort_values()
fig = go.Figure(go.Bar(x=vol.values, y=vol.index.astype(str), orientation="h",
marker_color=[_C["critical"] if v>0.4 else _C["warning"] if v>0.2 else _C["accent"] for v in vol.values],
text=[f"{v:.2f}" for v in vol.values], textposition="outside"))
fig.update_layout(**_L(title="VOLATILITE REVENUS PAR SECTEUR (EX_Income_Volatility)",
height=290, showlegend=False))
st.plotly_chart(fig, use_container_width=True)
# 7. Scatter efficacite seniorite
if "EX_Professional_Seniority_Efficiency" in df.columns and "Anciennete_Emploi" in df.columns:
eff = df["EX_Professional_Seniority_Efficiency"].fillna(0)
fig = go.Figure()
if "Statut_Pro" in df.columns:
for i, sp in enumerate(df["Statut_Pro"].dropna().unique()):
sub = df[df["Statut_Pro"]==sp]
fig.add_trace(go.Scatter(
x=sub["Anciennete_Emploi"], y=sub["EX_Professional_Seniority_Efficiency"].fillna(0),
mode="markers", name=str(sp),
marker=dict(size=10, color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])),
text=sub.get("Nom_Complet",sub.index).astype(str),
hovertemplate="<b>%{text}</b><br>Anciennete : %{x} mois<br>Efficacite : %{y:.1f}<extra></extra>"))
fig.update_layout(**_L(title="EFFICACITE PAR SENIORITE (revenu net / anciennete)",
height=260, xaxis_title="Anciennete (mois)", yaxis_title="Efficacite"))
st.plotly_chart(fig, use_container_width=True)
# 8. PCA 2D Clients
st.divider()
_ctx("CLUSTER MAP PCA 2D β€” Similarite multi-dimensionnelle des clients")
cli_feats = [
"Revenus_Mensuels","Autres_Revenus","Charges_Estimees","Anciennete_Emploi",
"Pers_Charge","Patrimoine_Declare","Checking_frequency",
"DX_Living_Cost_Ratio","DX_Disposable_Income_Stability",
"DX_Digital_Accessibility_Grade","DX_Professional_Trajectory_Index",
"EX_Income_Volatility_Matrix","EX_Contact_Reliability_Index",
"EX_Neighborhood_Income_Benchmark","EX_Professional_Seniority_Efficiency",
]
# Couleur prioritaire : FL_Risk_Target si present, sinon DX_Living_Cost_Ratio
color_col = "FL_Risk_Target" if "FL_Risk_Target" in df.columns else "DX_Living_Cost_Ratio"
color_lbl = "Risque" if color_col == "FL_Risk_Target" else "LCR"
c_pca1, c_pca2 = st.columns(2)
with c_pca1:
fig = _pca_ex(df, cli_feats, "Nom_Complet", color_col, color_lbl,
"CLIENTS (couleur = Risque)")
st.plotly_chart(fig, use_container_width=True)
with c_pca2:
fig = _pca_ex(df, cli_feats, "Nom_Complet", "DX_Disposable_Income_Stability",
"Stabilite rev.", "CLIENTS (couleur = Stabilite Revenus)",
colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]])
st.plotly_chart(fig, use_container_width=True)
def _explore_garants(df: pd.DataFrame, sheets: dict):
if df.empty:
st.info("Table Garants_KYC vide."); return
_ctx("SOURCE : Garants_KYC | AXEE : solidite mecanique, "
"interoperabilite bancaire, pression financiere comparee")
# Fiche comparative
st.markdown(
f'<div style="font-family:{_FONT};font-size:0.58rem;letter-spacing:2px;'
f'color:{_C["subtext"]};text-transform:uppercase;margin-bottom:8px;">'
f'COMPARAISON PROFILS GARANTS</div>',
unsafe_allow_html=True,
)
kpis = [
("Revenus Mensuels", "Revenus_Mensuels", "{:,.0f} FCFA"),
("Patrimoine Declare", "Patrimoine_Declare", "{:,.0f} FCFA"),
("Charges Estimees", "Charges_Estimees", "{:,.0f} FCFA"),
("Personnes a charge", "Pers_Charge", "{:.0f}"),
("Revenu disponible", "EX_Guarantor_Disposable_Income", "{:,.0f} FCFA"),
("Pression financiere", "EX_Garant_Liability_Pressure", "{:.1%}"),
("Leverage patrimoine", "DX_Guarantee_Leverage_Score", "{:.1f}x"),
("Disponibilite", "DX_Garant_Availability_Score", "{:.0f} / 2"),
]
cols = st.columns(len(df)+1)
with cols[0]:
for label,_,_ in kpis:
st.markdown(f'<div style="font-family:{_FONT};font-size:0.58rem;color:{_C["subtext"]};'
f'padding:5px 0;border-bottom:1px solid {_C["border"]};">{label}</div>',
unsafe_allow_html=True)
for i,(_, row) in enumerate(df.iterrows()):
with cols[i+1]:
nom = str(row.get("Nom_Complet", f"GARANT {i+1}"))
st.markdown(f'<div style="font-family:{_FONT};font-size:0.58rem;color:{_C["accent"]};'
f'font-weight:700;padding:5px 0;border-bottom:1px solid {_C["border"]};">'
f'{nom[:20]}</div>', unsafe_allow_html=True)
for _,kcol,kfmt in kpis:
val = row.get(kcol, None)
if val is None or (isinstance(val, float) and np.isnan(val)):
display = "β€”"
else:
try:
display = kfmt.format(float(val))
except Exception:
display = str(val)
st.markdown(f'<div style="font-family:{_FONT};font-size:0.58rem;color:{_C["text"]};'
f'padding:5px 0;border-bottom:1px solid {_C["border"]};">{display}</div>',
unsafe_allow_html=True)
st.markdown("<br>", unsafe_allow_html=True)
c1, c2 = st.columns(2)
# 1. Scatter wealth vs risk
with c1:
wtr = df.get("EX_Wealth_to_Risk_Ratio", pd.Series(1.0, index=df.index)).fillna(1.0)
fig = go.Figure(go.Scatter(
x=_safe(df,"Revenus_Mensuels"), y=_safe(df,"Patrimoine_Declare"),
mode="markers+text",
marker=dict(size=(wtr.clip(0,100)/wtr.clip(0,100).max()*18+8).tolist() if wtr.max()>0 else [12]*len(df),
color=_PAL[0], line=dict(width=1.5, color=_C["bg"])),
text=df.get("Nom_Complet", df.index).astype(str), textposition="top center",
customdata=wtr.round(1),
hovertemplate="<b>%{text}</b><br>Rev : %{x:,}<br>Patr : %{y:,}<br>W/R : %{customdata}<extra></extra>"))
fig.update_layout(**_L(title="WEALTH-TO-RISK (taille = ratio patrimoine/risque)",height=280,showlegend=False))
st.plotly_chart(fig, use_container_width=True)
# 2. Bar logistique entite x canal
with c2:
if "Entite_Financiere" in df.columns and "Moyen_Transfert" in df.columns:
cross = df.groupby(["Entite_Financiere","Moyen_Transfert"]).size().reset_index(name="n")
fig = go.Figure()
for i,mt in enumerate(cross["Moyen_Transfert"].unique()):
sub = cross[cross["Moyen_Transfert"]==mt]
fig.add_trace(go.Bar(name=str(mt),x=sub["Entite_Financiere"],y=sub["n"],marker_color=_PAL[i%len(_PAL)]))
fig.update_layout(**_L(title="EFFICACITE LOGISTIQUE Entite x Canal",barmode="stack",height=280))
st.plotly_chart(fig, use_container_width=True)
c3, c4 = st.columns(2)
# 3. Bar pression liability
with c3:
lp = df.get("EX_Garant_Liability_Pressure", pd.Series(0, index=df.index)).fillna(0)
noms = df.get("Nom_Complet", df.index.astype(str))
fig = go.Figure(go.Bar(x=noms.tolist(), y=lp.tolist(),
marker_color=[_C["critical"] if v>0.6 else _C["warning"] if v>0.35 else _C["success"] for v in lp],
text=[f"{v:.1%}" for v in lp], textposition="outside"))
fig.add_hline(y=0.6, line_dash="dot", line_color=_C["critical"],
annotation_text="Seuil danger 60%", annotation_font_size=9)
fig.update_layout(**_L(title="PRESSION CHARGES / REVENUS TOTAUX",height=280,showlegend=False,yaxis=dict(tickformat=".0%")))
st.plotly_chart(fig, use_container_width=True)
# 4. Interoperabilite bancaire
with c4:
interop = df.get("EX_Bank_Transfer_Interoperability", pd.Series(0, index=df.index)).fillna(0)
cnt = interop.value_counts()
fig = go.Figure(go.Pie(
labels=["Compatible","Incompatible"], values=[cnt.get(1,0), cnt.get(0,0)],
marker_colors=[_C["success"], _C["critical"]], hole=0.5))
fig.update_layout(**_L(title="INTEROPERABILITE BANCAIRE (meme canal que client)",height=280))
st.plotly_chart(fig, use_container_width=True)
c5, c6 = st.columns(2)
# 5. Box revenus par entite financiere
with c5:
if "Entite_Financiere" in df.columns:
fig = go.Figure()
for i,ef in enumerate(df["Entite_Financiere"].dropna().unique()):
sub = df[df["Entite_Financiere"]==ef]["Revenus_Mensuels"].dropna()
fig.add_trace(go.Box(y=sub, name=str(ef), marker_color=_PAL[i%len(_PAL)], boxpoints="all", jitter=0.3))
fig.update_layout(**_L(title="REVENUS PAR ENTITE FINANCIERE",height=280,showlegend=False))
st.plotly_chart(fig, use_container_width=True)
# 6. Scatter famille vs disponible
with c6:
disp = df.get("EX_Guarantor_Disposable_Income", pd.Series(0, index=df.index)).fillna(0)
fig = go.Figure(go.Scatter(
x=_safe(df,"Pers_Charge"), y=disp,
mode="markers+text",
marker=dict(size=13, color=[_C["critical"] if v<0 else _C["success"] for v in disp],
line=dict(width=1.5, color=_C["bg"])),
text=df.get("Nom_Complet", df.index).astype(str), textposition="top center",
hovertemplate="<b>%{text}</b><br>Personnes a charge : %{x}<br>Disponible : %{y:,}<extra></extra>"))
fig.add_hline(y=0, line_dash="dot", line_color=_C["neutral"], line_width=1)
fig.update_layout(**_L(title="PRESSION FAMILIALE vs REVENU DISPONIBLE",height=280,
showlegend=False,xaxis_title="Pers. a charge",yaxis_title="Disponible"))
st.plotly_chart(fig, use_container_width=True)
# 7. PCA 2D Garants
st.divider()
_ctx("CLUSTER MAP PCA 2D β€” Positionnement comparatif des garants")
gar_feats = [
"Revenus_Mensuels","Autres_Revenus","Charges_Estimees",
"Patrimoine_Declare","Pers_Charge","Anciennete_Emploi","Checking_frequency",
"DX_Guarantee_Leverage_Score","DX_Family_Pressure_Index",
"DX_Garant_Availability_Score","DX_Net_Worth_Credibility_Check",
"EX_Guarantor_Disposable_Income","EX_Wealth_to_Risk_Ratio","EX_Garant_Liability_Pressure",
]
g_pca1, g_pca2 = st.columns(2)
with g_pca1:
fig = _pca_ex(df, gar_feats, "Nom_Complet", "EX_Garant_Liability_Pressure",
"Pression fin.", "GARANTS (couleur = Pression Charges/Revenus)")
st.plotly_chart(fig, use_container_width=True)
with g_pca2:
fig = _pca_ex(df, gar_feats, "Nom_Complet", "DX_Guarantee_Leverage_Score",
"Leverage", "GARANTS (couleur = Leverage Patrimoine)",
colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]])
st.plotly_chart(fig, use_container_width=True)
def _explore_prets(df: pd.DataFrame, sheets: dict):
if df.empty:
st.info("Table Prets_Master vide."); return
_ctx("SOURCE : Prets_Master + Prets_Update | AXEE : elasticite taux/duree, "
"pression d'endettement par age, performance hierarchique")
# Fusion avec Update (version la plus recente)
upd = sheets.get("Prets_Update", pd.DataFrame())
if not upd.empty and "ID_Pret" in upd.columns and "Version" in upd.columns:
latest = upd.sort_values("Version", ascending=False).drop_duplicates("ID_Pret").set_index("ID_Pret")
df_m = df.copy().set_index("ID_Pret")
for col in ["Taux_Hebdo","Duree_Semaines","Montant_Capital","Statut","Motif"]:
if col in latest.columns:
df_m[col] = latest[col].combine_first(df_m.get(col, pd.Series(dtype=object)))
df_m = df_m.reset_index()
else:
df_m = df.copy()
# Calcul tranches d'age (STRING, pas Categorical)
cli = sheets.get("Clients_KYC", pd.DataFrame())
if not cli.empty and "ID_Client" in cli.columns and "Date_Naissance" in cli.columns:
cli2 = cli.copy()
cli2["Age"] = _age_calc(cli2["Date_Naissance"])
cli2["Tranche_Age"] = _age_bin(cli2["Age"]) # retourne str, pas Categorical
age_map = cli2.set_index("ID_Client")["Tranche_Age"].to_dict()
df_m["Tranche_Age"] = _safe(df_m,"ID_Client").map(age_map)
else:
df_m["Tranche_Age"] = np.nan
TRANCHES = ["<25","25-34","35-44","45-54","55+"] # ordre fixe, pas .cat.categories
c1, c2 = st.columns(2)
# 1. Elasticite taux / duree
with c1:
fig = go.Figure()
for i,of in enumerate(df_m.get("Offre",pd.Series()).dropna().unique()):
sub = df_m[df_m["Offre"]==of]
over = sub.get("EX_Endettement_Critical_Threshold", pd.Series(0,index=sub.index)).fillna(0)
eyr = sub.get("EX_Effective_Yield_Realization", pd.Series(0,index=sub.index)).fillna(0).clip(0,1)
fig.add_trace(go.Scatter(
x=sub["Duree_Semaines"], y=sub["Taux_Hebdo"], mode="markers", name=str(of),
marker=dict(
size=[(14 if v else 8) for v in over],
color=_PAL[i%len(_PAL)],
symbol=["diamond" if v else "circle" for v in over],
line=dict(width=1.5, color=[_C["critical"] if v else _C["bg"] for v in over])),
text=sub["ID_Pret"].astype(str),
customdata=sub.get("Taux_Endettement",pd.Series(0,index=sub.index)).fillna(0).round(1),
hovertemplate="<b>%{text}</b><br>%{x} sem. Taux %{y:.2%}<br>Endett. : %{customdata}%<extra></extra>"))
fig.update_layout(**_L(title="ELASTICITE TAUX/DUREE (losange = endettement > 40%)",
height=310, xaxis_title="Duree (semaines)",
yaxis=dict(tickformat=".1%"), yaxis_title="Taux Hebdo"))
st.plotly_chart(fig, use_container_width=True)
# 2. Boxplot pression par tranche d'age ← BUG CORRIGE ICI
with c2:
df_bp = df_m.dropna(subset=["Tranche_Age","Taux_Endettement"])
if not df_bp.empty:
crit = df_bp.get("EX_Endettement_Critical_Threshold", pd.Series(0,index=df_bp.index)).fillna(0)
fig = go.Figure()
# On itere sur la liste de strings, PAS sur .cat.categories
for i, tr in enumerate(TRANCHES):
sub = df_bp[df_bp["Tranche_Age"] == tr]
if sub.empty: continue
n_crit = int(crit.reindex(sub.index).fillna(0).sum())
fig.add_trace(go.Box(
y=sub["Taux_Endettement"], name=str(tr),
marker_color=_C["critical"] if n_crit > 0 else _PAL[i%len(_PAL)],
boxpoints="all", jitter=0.3,
hovertemplate=f"<b>{tr}</b><br>Endett. : %{{y:.1f}}%<extra></extra>"))
fig.add_hline(y=40, line_dash="dot", line_color=_C["critical"],
annotation_text="Seuil critique 40%",
annotation_font=dict(size=9, color=_C["critical"]))
fig.update_layout(**_L(title="PRESSION D'ENDETTEMENT PAR TRANCHE D'AGE",
height=310, showlegend=False, yaxis_title="Taux Endettement (%)"))
st.plotly_chart(fig, use_container_width=True)
else:
st.plotly_chart(_ef("Date_Naissance absente ou jointure impossible.", 310), use_container_width=True)
# 3. Sunburst performance
hier = df_m.groupby(["Offre","Type_Pret","Statut"]).size().reset_index(name="n")
fig = px.sunburst(hier, path=["Offre","Type_Pret","Statut"], values="n", color="Statut",
color_discrete_map={"ACTIF":"#58a6ff","TERMINE":"#2ecc71",
"EN_RETARD":"#f39c12","UPDATED":"#5a6a7a",
"DEFAUT":"#c0392b","LITIGE":"#c0392b"})
fig.update_traces(textfont=dict(family=_FONT,size=11),
hovertemplate="<b>%{label}</b><br>Prets : %{value}<br>Part : %{percentParent:.1%}<extra></extra>")
fig.update_layout(**_L(title="RADAR DE PERFORMANCE Offre > Type > Statut", height=370))
st.plotly_chart(fig, use_container_width=True)
c3, c4 = st.columns(2)
# 4. Scatter anomalie de duree
with c3:
if "EX_Duration_Anomaly_Detection" in df_m.columns and "Type_Pret" in df_m.columns:
fig = go.Figure()
for i,tp in enumerate(df_m["Type_Pret"].dropna().unique()):
sub = df_m[df_m["Type_Pret"]==tp]
anom = sub["EX_Duration_Anomaly_Detection"].fillna(0).abs()
fig.add_trace(go.Scatter(
x=sub["Duree_Semaines"], y=sub["EX_Duration_Anomaly_Detection"].fillna(0),
mode="markers", name=str(tp),
marker=dict(size=(anom*6+8).clip(upper=20).tolist(),
color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])),
text=sub["ID_Pret"].astype(str),
hovertemplate="<b>%{text}</b><br>Duree : %{x} sem.<br>Z-score : %{y:.2f}<extra></extra>"))
fig.add_hline(y=0, line_dash="dot", line_color=_C["neutral"], line_width=1)
fig.update_layout(**_L(title="ANOMALIE DE DUREE PAR TYPE (z-score)",height=290,
xaxis_title="Duree (semaines)",yaxis_title="Z-score"))
st.plotly_chart(fig, use_container_width=True)
# 5. Bar rendement effectif
with c4:
if "EX_Effective_Yield_Realization" in df_m.columns:
df_m2 = df_m.dropna(subset=["EX_Effective_Yield_Realization"])
if not df_m2.empty:
fig = go.Figure(go.Bar(
x=df_m2["ID_Pret"].astype(str),
y=df_m2["EX_Effective_Yield_Realization"],
marker_color=[_C["success"] if v>=0.9 else _C["warning"] if v>=0.5 else _C["critical"]
for v in df_m2["EX_Effective_Yield_Realization"]],
text=[f"{v:.1%}" for v in df_m2["EX_Effective_Yield_Realization"]],
textposition="outside"))
fig.add_hline(y=1.0, line_dash="dot", line_color=_C["neutral"])
fig.update_layout(**_L(title="RENDEMENT EFFECTIF REALISE (1.0 = 100% interets encaisses)",
height=290, showlegend=False, yaxis=dict(tickformat=".0%")))
st.plotly_chart(fig, use_container_width=True)
# 6. Scatter capital vs pers a charge
if "EX_Loan_Size_vs_Family_Size" in df_m.columns:
fig = go.Figure()
for i,of in enumerate(df_m.get("Offre",pd.Series()).dropna().unique()):
sub = df_m[df_m["Offre"]==of]
pch = _safe(cli,"Pers_Charge") if not cli.empty else pd.Series(0,index=sub.index)
pch_vals = sub.get("_pch_tmp", sub["ID_Client"].map(
cli.set_index("ID_Client")["Pers_Charge"].to_dict() if not cli.empty and "ID_Client" in cli.columns else {}
).fillna(0)) if not cli.empty and "ID_Client" in cli.columns and "Pers_Charge" in cli.columns else pd.Series(1,index=sub.index)
fig.add_trace(go.Scatter(
x=pch_vals, y=sub["Montant_Capital"],
mode="markers", name=str(of),
marker=dict(size=10, color=_PAL[i%len(_PAL)], line=dict(width=1,color=_C["bg"])),
text=sub["ID_Pret"].astype(str),
hovertemplate="<b>%{text}</b><br>Pers. : %{x}<br>Capital : %{y:,}<extra></extra>"))
fig.update_layout(**_L(title="CAPITAL vs TAILLE MENAGE",height=260,
xaxis_title="Personnes a charge",yaxis_title="Montant Capital"))
st.plotly_chart(fig, use_container_width=True)
# PCA 2D Prets
st.divider()
_ctx("CLUSTER MAP PCA 2D β€” Positionnement des prets dans l'espace contractuel")
prt_feats_pca = [
"Montant_Capital","Taux_Hebdo","Taux_Endettement","Duree_Semaines",
"Montant_Versement","Montant_Total","Cout_Credit","Nb_Versements",
"DX_Interest_Yield_Factor","DX_Debt_to_Income_Pressure",
"DX_Motif_Risk_Weight","DX_Collateral_Coverage_Ratio",
"EX_Endettement_Critical_Threshold","EX_Duration_Anomaly_Detection",
"EX_Effective_Yield_Realization",
]
df_pca_prt = df_m.copy()
df_pca_prt["_prt_risk"] = (
df_pca_prt.get("DX_Motif_Risk_Weight", pd.Series(0.5, index=df_pca_prt.index)).fillna(0.5) * 0.5
+ (df_pca_prt["Taux_Endettement"].fillna(0) / 100).clip(0, 1) * 0.5
).round(3)
p_pca1, p_pca2 = st.columns(2)
with p_pca1:
fig = _pca_ex(df_pca_prt, prt_feats_pca, "ID_Pret", "_prt_risk",
"Risque contrat", "PRETS (couleur = Motif Risk + Endettement)")
st.plotly_chart(fig, use_container_width=True)
with p_pca2:
fig = _pca_ex(df_pca_prt, prt_feats_pca, "Nom_Complet", "Taux_Endettement",
"Endettement %", "PRETS (couleur = Taux Endettement)",
colorscale=[[0,"#2ecc71"],[0.4,"#f39c12"],[1,"#c0392b"]])
st.plotly_chart(fig, use_container_width=True)
def _explore_prets_update(df: pd.DataFrame, sheets: dict):
if df.empty:
st.info("Table Prets_Update vide."); return
_ctx("SOURCE : Prets_Update | AXEE : amplitude des modifications, "
"derive de montant par version, patterns de renegociation")
prt = sheets.get("Prets_Master", pd.DataFrame())
c1, c2 = st.columns(2)
# 1. Version drift par pret
with c1:
if not prt.empty and "ID_Pret" in prt.columns and "Montant_Total" in prt.columns and "Montant_Total" in df.columns:
mt_orig = prt.set_index("ID_Pret")["Montant_Total"].to_dict()
drift = df.copy()
drift["MT_Orig"] = _safe(drift,"ID_Pret").map(mt_orig)
drift["Drift"] = _safe(drift,"Montant_Total") - drift["MT_Orig"].fillna(0)
fig = go.Figure()
for pid in drift["ID_Pret"].dropna().unique():
sub = drift[drift["ID_Pret"]==pid].sort_values("Version")
col = _C["critical"] if sub["Drift"].iloc[-1]>0 else _C["success"]
fig.add_trace(go.Scatter(x=sub["Version"],y=sub["Drift"],mode="lines+markers",
name=str(pid),line=dict(color=col,width=1.5),marker=dict(size=7),
hovertemplate=f"<b>{pid}</b><br>V%{{x}}<br>Derive : %{{y:+,.0f}}<extra></extra>"))
fig.add_hline(y=0,line_dash="dot",line_color=_C["neutral"],line_width=1)
fig.update_layout(**_L(title="DERIVE MONTANT TOTAL PAR VERSION (rouge = hausse)",
height=290,xaxis_title="Version",yaxis_title="Ecart vs original"))
st.plotly_chart(fig, use_container_width=True)
# 2. Amplitude modifications
with c2:
if "EX_Modification_Pattern_Clustering" in df.columns:
fig = go.Figure(go.Bar(
x=df["ID_Pret_Updated"].astype(str) if "ID_Pret_Updated" in df.columns else df.index.astype(str),
y=df["EX_Modification_Pattern_Clustering"],
marker_color=[_C["critical"] if v>=3 else _C["warning"] if v>=2 else _C["accent"]
for v in df["EX_Modification_Pattern_Clustering"]],
text=df["EX_Modification_Pattern_Clustering"],textposition="outside"))
fig.update_layout(**_L(title="AMPLITUDE DES MODIFICATIONS (rouge = 3+ champs)",
height=290,showlegend=False,yaxis_title="Champs modifies"))
st.plotly_chart(fig, use_container_width=True)
c3, c4 = st.columns(2)
# 3. Scatter delta vs velocity
with c3:
if "DX_Contract_Volatility_Delta" in df.columns and "DX_Update_Velocity" in df.columns:
fig = go.Figure(go.Scatter(
x=df["DX_Update_Velocity"],y=df["DX_Contract_Volatility_Delta"],mode="markers",
marker=dict(size=12,color=[_C["critical"] if v>0 else _C["success"] for v in df["DX_Contract_Volatility_Delta"]],
line=dict(width=1.5,color=_C["bg"])),
text=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str),
hovertemplate="<b>%{text}</b><br>Velocity : %{x}j<br>Delta : %{y:+,.0f}<extra></extra>"))
fig.add_hline(y=0,line_dash="dot",line_color=_C["neutral"],line_width=1)
fig.update_layout(**_L(title="DELTA CONTRAT vs VELOCITY",height=280,
xaxis_title="Jours depuis creation",yaxis_title="Delta Montant Total",showlegend=False))
st.plotly_chart(fig, use_container_width=True)
# 4. Gravite renegociation
with c4:
if "DX_Renegotiation_Gravity_Score" in df.columns:
fig = go.Figure(go.Bar(
x=df["ID_Pret_Updated"].astype(str) if "ID_Pret_Updated" in df.columns else df.index.astype(str),
y=df["DX_Renegotiation_Gravity_Score"],
marker_color=[_C["critical"] if v>0.10 else _C["accent"] for v in df["DX_Renegotiation_Gravity_Score"]],
text=df["DX_Renegotiation_Gravity_Score"].round(3),textposition="outside"))
fig.update_layout(**_L(title="GRAVITE RENEGOCIATION (score keywords crise)",
height=280,showlegend=False))
st.plotly_chart(fig, use_container_width=True)
c5, c6 = st.columns(2)
# 5. Evolution taux par version
with c5:
if not prt.empty and "Taux_Hebdo" in prt.columns and "Taux_Hebdo" in df.columns:
taux_orig = prt.set_index("ID_Pret")["Taux_Hebdo"].to_dict()
fig = go.Figure()
for pid in df["ID_Pret"].dropna().unique():
sub = df[df["ID_Pret"]==pid].sort_values("Version")
orig = taux_orig.get(pid, np.nan)
if not (isinstance(orig,float) and np.isnan(orig)):
vals = [orig] + sub["Taux_Hebdo"].tolist()
vers = ["V1"] + [f"V{v}" for v in sub["Version"]]
col = _C["critical"] if vals[-1]>orig else _C["success"]
fig.add_trace(go.Scatter(x=vers,y=vals,mode="lines+markers",name=str(pid),
line=dict(color=col,width=1.5),marker=dict(size=6),
hovertemplate=f"<b>{pid}</b> %{{x}}<br>Taux : %{{y:.2%}}<extra></extra>"))
fig.update_layout(**_L(title="EVOLUTION TAUX HEBDO PAR VERSION",height=280,
yaxis=dict(tickformat=".2%")))
st.plotly_chart(fig, use_container_width=True)
# 6. Fragilite structurelle
with c6:
if "DX_Structural_Fragility_Index" in df.columns:
fig = go.Figure(go.Bar(
x=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str),
y=df["DX_Structural_Fragility_Index"],
marker_color=[_C["critical"] if v>0.15 else _C["warning"] if v>0.08 else _C["accent"]
for v in df["DX_Structural_Fragility_Index"]],
text=df["DX_Structural_Fragility_Index"].round(4),textposition="outside"))
fig.update_layout(**_L(title="FRAGILITE STRUCTURELLE (modifications/semaines duree)",
height=280,showlegend=False))
st.plotly_chart(fig, use_container_width=True)
# 7. PCA 2D Prets Update
st.divider()
_ctx("CLUSTER MAP PCA 2D β€” Similarite des modifications contractuelles")
upd_feats_pca = [
"Version","Montant_Capital","Taux_Hebdo","Taux_Endettement",
"Duree_Semaines","Montant_Total","Cout_Credit",
"DX_Contract_Volatility_Delta","DX_Update_Velocity",
"DX_Renegotiation_Gravity_Score","DX_Structural_Fragility_Index",
"EX_Modification_Pattern_Clustering","EX_Version_Drift_Analysis",
]
df_upd_pca = df.copy()
df_upd_pca["_grav"] = df_upd_pca.get("DX_Renegotiation_Gravity_Score",
pd.Series(0, index=df_upd_pca.index)).fillna(0)
raw_drift = df_upd_pca.get("EX_Version_Drift_Analysis", pd.Series(0, index=df_upd_pca.index)).fillna(0)
mx_drift = raw_drift.abs().max()
df_upd_pca["_drift_n"] = (raw_drift / mx_drift).round(3) if mx_drift > 0 else pd.Series(0.0, index=df_upd_pca.index)
u_pca1, u_pca2 = st.columns(2)
with u_pca1:
fig = _pca_ex(df_upd_pca, upd_feats_pca, "ID_Pret", "_grav",
"Gravite reneg.", "UPDATES (couleur = Gravite Renegociation)")
st.plotly_chart(fig, use_container_width=True)
with u_pca2:
fig = _pca_ex(df_upd_pca, upd_feats_pca, "ID_Pret", "_drift_n",
"Derive montant", "UPDATES (couleur = Derive Montant Total)",
colorscale=[[0,"#58a6ff"],[0.5,"#f39c12"],[1,"#c0392b"]])
st.plotly_chart(fig, use_container_width=True)
def _explore_remboursements(df: pd.DataFrame, sheets: dict):
if df.empty:
st.info("Table Remboursements vide."); return
_ctx("SOURCE : Remboursements | AXEE : comportement temporel, "
"correlation montant/retard, fenetres horaires d'encaissement")
c1, c2 = st.columns(2)
# 1. Scatter delai vs montant
with c1:
sp_c = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"],
"ANTICIPE":_C["accent"],"PAYE":_C["success"],"PARTIEL":_C["warning"]}
anomaly = df.get("EX_Timestamp_Anomaly_Log", pd.Series(0, index=df.index)).fillna(0)
fig = go.Figure()
for sp in df["Statut_Paiement"].dropna().unique():
mask = df["Statut_Paiement"]==sp; sub = df[mask]; a = anomaly[mask]
fig.add_trace(go.Scatter(x=sub["Jours_Retard"],y=sub["Montant_Verse"],mode="markers",name=str(sp),
marker=dict(size=[14 if x else 9 for x in a],
color=sp_c.get(str(sp).upper(),_C["neutral"]),
symbol=["x" if x else "circle" for x in a],
line=dict(width=1.5,color=_C["bg"])),
text=sub["ID_Pret"].astype(str),
hovertemplate=f"<b>{sp}</b><br>Retard : %{{x}}j<br>Montant : %{{y:,}}<extra></extra>"))
fig.add_vline(x=0,line_dash="dot",line_color=_C["neutral"],line_width=1)
fig.update_layout(**_L(title="DELAI vs MONTANT (X = heure atypique)",height=310,
xaxis_title="Jours Retard",yaxis_title="Montant Verse"))
st.plotly_chart(fig, use_container_width=True)
# 2. Heatmap temporelle
with c2:
ts = pd.to_datetime(df.get("Timestamp", pd.Series(dtype=str)), errors="coerce")
df2 = df.copy(); df2["Heure"] = ts.dt.hour
jmap = {"Monday":"Lundi","Tuesday":"Mardi","Wednesday":"Mercredi",
"Thursday":"Jeudi","Friday":"Vendredi","Saturday":"Samedi","Sunday":"Dimanche"}
df2["Jour"] = ts.dt.day_name().map(jmap)
jord = ["Lundi","Mardi","Mercredi","Jeudi","Vendredi","Samedi","Dimanche"]
df2v = df2.dropna(subset=["Heure","Jour"])
if not df2v.empty:
pivot = df2v.groupby(["Jour","Heure"]).size().reset_index(name="n")
fig = go.Figure(go.Heatmap(
x=pivot["Heure"],y=pivot["Jour"],z=pivot["n"],
colorscale=[[0,_C["card"]],[0.4,"#1a3a5c"],[1,_C["ex"]]],
text=pivot["n"],texttemplate="%{text}",
hovertemplate="<b>%{y}</b> %{x}h00 : %{z} paiements<extra></extra>",
colorbar=dict(title="Paiements",tickfont=dict(size=9,family=_FONT)),
xgap=2,ygap=2))
fig.update_layout(**_L(title="FENETRES D'ENCAISSEMENT (jour x heure)",height=310,
xaxis=dict(title="Heure",dtick=2),
yaxis=dict(categoryorder="array",categoryarray=jord)))
st.plotly_chart(fig, use_container_width=True)
else:
st.plotly_chart(_ef("Timestamps non parsables.",310), use_container_width=True)
c3, c4 = st.columns(2)
# 3. Bar propension paiement anticipe par client
with c3:
if "ID_Client" in df.columns and "EX_Early_Payment_Propensity" in df.columns:
prop = df.groupby("ID_Client")["EX_Early_Payment_Propensity"].mean().sort_values(ascending=False)
fig = go.Figure(go.Bar(x=prop.index.astype(str),y=prop.values,
marker_color=[_C["success"] if v>0.5 else _C["warning"] if v>0 else _C["critical"] for v in prop.values],
text=[f"{v:.0%}" for v in prop.values],textposition="outside"))
fig.update_layout(**_L(title="PROPENSION PAIEMENT ANTICIPE PAR CLIENT",
height=280,showlegend=False,yaxis=dict(tickformat=".0%")))
st.plotly_chart(fig, use_container_width=True)
# 4. Scatter montant vs solde avant
with c4:
if "Solde_Avant" in df.columns:
fig = go.Figure()
sp_c2 = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"],"ANTICIPE":_C["accent"]}
for sp in df["Statut_Paiement"].dropna().unique():
mask = df["Statut_Paiement"]==sp; sub = df[mask]
fig.add_trace(go.Scatter(x=sub["Solde_Avant"],y=sub["Montant_Verse"],mode="markers",name=str(sp),
marker=dict(size=10,color=sp_c2.get(str(sp).upper(),_C["neutral"]),line=dict(width=1,color=_C["bg"])),
hovertemplate=f"<b>{sp}</b><br>Solde : %{{x:,}}<br>Verse : %{{y:,}}<extra></extra>"))
mx = max(df["Solde_Avant"].max(),df["Montant_Verse"].max())*1.05
fig.add_trace(go.Scatter(x=[0,mx],y=[0,mx],mode="lines",
line=dict(color=_C["subtext"],dash="dot",width=1),showlegend=False,hoverinfo="skip"))
fig.update_layout(**_L(title="MONTANT VERSE vs SOLDE INITIAL",height=280,
xaxis_title="Solde Avant",yaxis_title="Montant Verse"))
st.plotly_chart(fig, use_container_width=True)
c5, c6 = st.columns(2)
# 5. Bar coherence du paiement par client
with c5:
if "ID_Client" in df.columns and "EX_Payment_Consistency_Score" in df.columns:
cons = df.groupby("ID_Client")["EX_Payment_Consistency_Score"].mean().sort_values()
fig = go.Figure(go.Bar(x=cons.index.astype(str),y=cons.values,
marker_color=[_C["critical"] if v>5000 else _C["warning"] if v>2000 else _C["success"] for v in cons.values],
text=cons.round(0).astype(int).tolist(),textposition="outside"))
fig.update_layout(**_L(title="ECART-TYPE MONTANT VERSE PAR CLIENT (coherence)",
height=280,showlegend=False,yaxis_title="Ecart-type"))
st.plotly_chart(fig, use_container_width=True)
# 6. Timeline chronologique
with c6:
d2 = df.copy(); d2["Date_Paiement"] = pd.to_datetime(d2.get("Date_Paiement"),errors="coerce")
d2 = d2.dropna(subset=["Date_Paiement"])
if not d2.empty:
sp_c3 = {"EN_RETARD":_C["critical"],"PONCTUEL":_C["success"],"ANTICIPE":_C["accent"]}
fig = go.Figure()
for sp in d2["Statut_Paiement"].dropna().unique():
mask = d2["Statut_Paiement"]==sp; sub = d2[mask]
fig.add_trace(go.Scatter(x=sub["Date_Paiement"],y=sub["Montant_Verse"],mode="markers",name=str(sp),
marker=dict(size=10,color=sp_c3.get(str(sp).upper(),_C["neutral"]),line=dict(width=1.5,color=_C["bg"])),
text=sub["ID_Pret"].astype(str),
hovertemplate="<b>%{text}</b><br>%{x|%d/%m/%Y}<br>%{y:,}<extra></extra>"))
fig.update_layout(**_L(title="TIMELINE CHRONOLOGIQUE DES PAIEMENTS",height=280))
st.plotly_chart(fig, use_container_width=True)
# 7. PCA 2D Remboursements
st.divider()
_ctx("CLUSTER MAP PCA 2D β€” Comportements de paiement regroupes")
rem_feats_pca = [
"Montant_Verse","Montant_Principal","Montant_Interets","Penalites_Retard",
"Solde_Avant","Solde_Apres","Jours_Retard",
"DX_Profitability_Realization_Rate","DX_Overpayment_Tendency",
"DX_Channel_Risk_Correlation","DX_Temporal_Payment_Behavior",
"EX_Early_Payment_Propensity","EX_Payment_Consistency_Score",
"EX_Collection_Cost_Efficiency",
]
df_rem_pca = df.copy()
# Score risque remboursement : retard positif normalise
jr = df_rem_pca["Jours_Retard"].clip(lower=0)
jr_max = jr.max()
df_rem_pca["_rem_risk"] = (jr / jr_max).round(3) if jr_max > 0 else pd.Series(0.0, index=df_rem_pca.index)
r_pca1, r_pca2 = st.columns(2)
with r_pca1:
fig = _pca_ex(df_rem_pca, rem_feats_pca, "ID_Pret", "_rem_risk",
"Risque retard", "REMBOURSEMENTS (couleur = Jours Retard norm.)")
st.plotly_chart(fig, use_container_width=True)
with r_pca2:
fig = _pca_ex(df_rem_pca, rem_feats_pca, "ID_Pret", "Montant_Verse",
"Montant", "REMBOURSEMENTS (couleur = Montant Verse)",
colorscale=[[0,"#c0392b"],[0.5,"#f39c12"],[1,"#2ecc71"]])
st.plotly_chart(fig, use_container_width=True)
def _explore_ajustements(df: pd.DataFrame, sheets: dict):
if df.empty:
st.markdown(
f'<div style="background:{_C["card"]};border:1px solid {_C["border"]};'
f'border-left:3px solid {_C["neutral"]};padding:16px 20px;font-family:{_FONT};">'
f'<div style="font-size:0.50rem;letter-spacing:2px;color:{_C["subtext"]};'
f'text-transform:uppercase;margin-bottom:6px;">STATUS OPERATIONNEL</div>'
f'<div style="font-size:1rem;color:{_C["subtext"]};">TABLE VIDE EN ATTENTE D\'ALIMENTATION</div>'
f'<div style="font-size:0.58rem;color:{_C["subtext"]};margin-top:4px;">'
f'Analyses causales actives des le premier ajustement enregistre.</div>'
f'</div>', unsafe_allow_html=True)
return
_ctx("SOURCE : Ajustements_Echeances + Prets_Master | AXEE : "
"frequence, impact financier, correlation defauts futurs")
c1, c2 = st.columns(2)
with c1:
cnt = df["Raison"].value_counts()
r2d = df.groupby("Raison")["EX_Reason_to_Default_Bridge"].mean() if "EX_Reason_to_Default_Bridge" in df.columns else pd.Series(0,index=cnt.index)
bc = [_C["critical"] if r2d.get(r,0)>0.3 else _C["warning"] if r2d.get(r,0)>0.1 else _C["accent"] for r in cnt.index]
fig = go.Figure(go.Bar(x=cnt.values,y=cnt.index.astype(str),orientation="h",
marker_color=bc,text=cnt.values,textposition="outside"))
fig.update_layout(**_L(title="RAISONS (couleur = pont defaut)",height=300,showlegend=False))
st.plotly_chart(fig, use_container_width=True)
with c2:
if "EX_Timing_of_Adjustment" in df.columns and "EX_Amount_Expansion_Factor" in df.columns:
fig = go.Figure(go.Scatter(
x=df["EX_Timing_of_Adjustment"].fillna(0),
y=df["EX_Amount_Expansion_Factor"].fillna(0),
mode="markers",
marker=dict(size=12,color=_C["ex"],line=dict(width=1.5,color=_C["bg"])),
text=df["ID_Pret"].astype(str) if "ID_Pret" in df.columns else df.index.astype(str),
hovertemplate="<b>%{text}</b><br>Timing : %{x}j<br>Expansion : %{y:.2%}<extra></extra>"))
fig.update_layout(**_L(title="TIMING vs EXPANSION MONTANT",height=300,showlegend=False,
xaxis_title="Jours avant fin",yaxis_title="Facteur expansion"))
st.plotly_chart(fig, use_container_width=True)
# ── Routeur public ────────────────────────────────────────────────────────────
_MAP = {
"CLIENTS": ("Clients_KYC", _explore_clients),
"GARANTS": ("Garants_KYC", _explore_garants),
"PRETS": ("Prets_Master", _explore_prets),
"PRETS_UPDATE": ("Prets_Update", _explore_prets_update),
"PRETS UPDATE": ("Prets_Update", _explore_prets_update),
"REMBOURSEMENTS": ("Remboursements", _explore_remboursements),
"AJUSTEMENTS": ("Ajustements_Echeances", _explore_ajustements),
}
def render_exploration(sheets: dict, section: str):
section_up = section.upper().replace("_"," ").strip()
for key in [section, section_up, section.upper()]:
if key in _MAP:
tab_key, fn = _MAP[key]
fn(sheets.get(tab_key, pd.DataFrame()), sheets)
return
st.warning(f"Section inconnue : {section}")