import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import numpy as np
import time
def show_ml_feature_store(client, sheet_name):
"""
Affiche le ML Feature Store & Analytics
Args:
client: Client gspread authentifié
sheet_name: Nom de la Google Sheet
"""
# Fonction helper pour charger les données
def get_data_from_sheet(sheet_name_tab):
try:
sh = client.open(sheet_name)
ws = sh.worksheet(sheet_name_tab)
return pd.DataFrame(ws.get_all_records())
except:
return pd.DataFrame()
# === GOTHAM SURVEILLANCE THEME CSS ===
st.markdown("""
""", unsafe_allow_html=True)
# Header principal
st.markdown("""
ML Feature Store & Analytics
""", unsafe_allow_html=True)
# Boutons de contrôle
col_refresh, col_auto = st.columns([3, 1])
with col_refresh:
if st.button("Actualiser les données", use_container_width=True):
st.rerun()
with col_auto:
auto_refresh = st.checkbox("Auto 30s", value=False)
if auto_refresh:
st_autorefresh = st.empty()
with st_autorefresh:
st.info("✅ Actualisation automatique activée")
time.sleep(30)
st.rerun()
# Chargement des données pour le feature store
try:
df_clients = get_data_from_sheet("Clients_KYC")
df_prets = get_data_from_sheet("Prets_Master")
except Exception as e:
st.error(f"❌ Erreur de lecture : {e}")
st.stop()
if df_clients.empty or df_prets.empty:
st.warning("⚠️ Données insuffisantes")
st.stop()
# Fusion des données
df_full = pd.merge(df_prets, df_clients, on="ID_Client", how="left", suffixes=('_Pret', '_Client'))
# Thème Plotly surveillance discret
plotly_template = {
'layout': {
'plot_bgcolor': 'rgba(13, 17, 23, 0.8)',
'paper_bgcolor': 'rgba(22, 27, 34, 0.3)',
'font': {'color': '#8b949e', 'family': 'Space Grotesk', 'size': 12},
'title': {
'font': {'size': 14, 'color': '#c9d1d9', 'family': 'Space Grotesk'},
'x': 0,
'xanchor': 'left'
},
'xaxis': {
'gridcolor': 'rgba(48, 54, 61, 0.3)',
'linecolor': 'rgba(48, 54, 61, 0.5)',
'zerolinecolor': 'rgba(48, 54, 61, 0.5)',
'tickfont': {'family': 'Space Grotesk', 'size': 11}
},
'yaxis': {
'gridcolor': 'rgba(48, 54, 61, 0.3)',
'linecolor': 'rgba(48, 54, 61, 0.5)',
'zerolinecolor': 'rgba(48, 54, 61, 0.5)',
'tickfont': {'family': 'Space Grotesk', 'size': 11}
},
'legend': {
'font': {'family': 'Space Grotesk', 'size': 11}
}
}
}
# FEATURE ENGINEERING
st.markdown("Feature Engineering Pipeline
", unsafe_allow_html=True)
# 1. FEATURES DÉMOGRAPHIQUES
with st.expander(" Features Démographiques", expanded=True):
df_full['Date_Naissance'] = pd.to_datetime(df_full['Date_Naissance'], errors='coerce')
df_full['Age'] = ((datetime.now() - df_full['Date_Naissance']).dt.days / 365.25).round(0)
df_full['Pers_Charge'] = pd.to_numeric(df_full['Pers_Charge'], errors='coerce').fillna(0)
df_full['Categorie_Age'] = pd.cut(df_full['Age'],
bins=[0, 25, 35, 45, 55, 100],
labels=['18-25', '26-35', '36-45', '46-55', '55+'])
col1, col2, col3, col4 = st.columns(4)
col1.metric("Âge moyen", f"{df_full['Age'].mean():.1f} ans",
delta=f"σ {df_full['Age'].std():.1f}")
col2.metric("Pers. charge", f"{df_full['Pers_Charge'].mean():.1f}",
delta=f"Max {df_full['Pers_Charge'].max():.0f}")
col3.metric("Mariés", f"{(df_full['Etat_Civil']=='Marié(e)').sum()}")
col4.metric("Célibataires", f"{(df_full['Etat_Civil']=='Célibataire').sum()}")
fig_age = px.histogram(df_full, x='Age', nbins=20,
title="Distribution des âges",
color_discrete_sequence=['#58a6ff'],
template=plotly_template)
fig_age.update_traces(marker_line_color='rgba(139, 148, 158, 0.3)', marker_line_width=0.5, opacity=0.8)
st.plotly_chart(fig_age, use_container_width=True)
# 2. FEATURES FINANCIÈRES
with st.expander(" Features Financières", expanded=True):
df_full['Revenus_Mensuels'] = pd.to_numeric(df_full['Revenus_Mensuels'], errors='coerce').fillna(0)
df_full['Autres_Revenus'] = pd.to_numeric(df_full['Autres_Revenus'], errors='coerce').fillna(0)
df_full['Charges_Estimees'] = pd.to_numeric(df_full['Charges_Estimees'], errors='coerce').fillna(0)
df_full['Patrimoine_Declare'] = pd.to_numeric(df_full['Patrimoine_Declare'], errors='coerce').fillna(0)
df_full['Montant_Capital'] = pd.to_numeric(df_full['Montant_Capital'], errors='coerce').fillna(0)
df_full['Montant_Versement'] = pd.to_numeric(df_full['Montant_Versement'], errors='coerce').fillna(0)
df_full['Revenus_Totaux'] = df_full['Revenus_Mensuels'] + df_full['Autres_Revenus']
df_full['Revenu_Net'] = df_full['Revenus_Totaux'] - df_full['Charges_Estimees']
df_full['Ratio_Charges_Revenus'] = (df_full['Charges_Estimees'] / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0)
df_full['Taux_Effort_Mensuel'] = ((df_full['Montant_Versement'] * 4.33) / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0) * 100
df_full['Capacite_Endettement'] = df_full['Revenus_Totaux'] * 0.33
df_full['Reste_A_Vivre'] = df_full['Revenus_Totaux'] - df_full['Charges_Estimees'] - (df_full['Montant_Versement'] * 4.33)
col1, col2, col3, col4 = st.columns(4)
col1.metric("Revenu moyen", f"{df_full['Revenus_Totaux'].mean():,.0f} XOF",
delta=f"Med {df_full['Revenus_Totaux'].median():,.0f}")
col2.metric("Taux effort", f"{df_full['Taux_Effort_Mensuel'].mean():.1f}%",
delta="⚠️ Risque" if df_full['Taux_Effort_Mensuel'].mean() > 33 else "✅ Sain")
col3.metric("Reste à vivre", f"{df_full['Reste_A_Vivre'].mean():,.0f} XOF",
delta=f"Min {df_full['Reste_A_Vivre'].min():,.0f}")
col4.metric("Patrimoine", f"{df_full['Patrimoine_Declare'].mean():,.0f} XOF")
fig_effort = px.box(df_full, y='Taux_Effort_Mensuel',
title="Taux d'effort mensuel (%)",
color_discrete_sequence=['#58a6ff'],
template=plotly_template)
fig_effort.add_hline(y=33, line_dash="dash", line_color="rgba(248, 81, 73, 0.6)",
line_width=1.5,
annotation_text="Seuil critique 33%",
annotation_font_color="#8b949e",
annotation_font_size=11)
fig_effort.update_traces(opacity=0.7)
st.plotly_chart(fig_effort, use_container_width=True)
# 3. FEATURES PROFESSIONNELLES
with st.expander("💼 Features Professionnelles", expanded=True):
df_full['Anciennete_Emploi'] = pd.to_numeric(df_full['Anciennete_Emploi'], errors='coerce').fillna(0)
df_full['Anciennete_Revenu'] = pd.to_numeric(df_full['Anciennete_Revenu'], errors='coerce').fillna(0)
df_full['Stabilite_Pro'] = df_full['Anciennete_Emploi'].apply(
lambda x: 'Élevée' if x >= 36 else ('Moyenne' if x >= 12 else 'Faible')
)
col1, col2, col3 = st.columns(3)
col1.metric("Ancienneté", f"{df_full['Anciennete_Emploi'].mean():.1f} mois")
col2.metric("CDI / Stable", f"{(df_full['Statut_Pro']=='CDI').sum()}")
col3.metric("Indépendants", f"{(df_full['Statut_Pro']=='Indépendant').sum()}")
secteur_count = df_full['Secteur_Activite'].value_counts().head(10)
fig_secteur = px.bar(x=secteur_count.values, y=secteur_count.index,
orientation='h',
title="Top 10 secteurs d'activité",
color_discrete_sequence=['#58a6ff'],
template=plotly_template)
fig_secteur.update_traces(marker_line_color='rgba(139, 148, 158, 0.2)', marker_line_width=0.5, opacity=0.8)
st.plotly_chart(fig_secteur, use_container_width=True)
# 4. FEATURES DE PRÊTS
with st.expander(" Features de Prêts & Historique", expanded=True):
# Conversion des colonnes en numérique AVANT le groupby
df_prets['Montant_Capital'] = pd.to_numeric(df_prets['Montant_Capital'], errors='coerce').fillna(0)
df_prets['Duree_Semaines'] = pd.to_numeric(df_prets['Duree_Semaines'], errors='coerce').fillna(0)
pret_history = df_prets.groupby('ID_Client').agg({
'ID_Pret': 'count',
'Montant_Capital': ['sum', 'mean', 'max'],
'Duree_Semaines': 'mean',
'Statut': lambda x: (x == 'Actif').sum()
}).reset_index()
pret_history.columns = ['ID_Client', 'Nb_Prets_Total', 'Montant_Total_Emprunte',
'Montant_Moyen_Pret', 'Montant_Max_Pret',
'Duree_Moyenne_Semaines', 'Nb_Prets_Actifs']
df_full = pd.merge(df_full, pret_history, on='ID_Client', how='left')
df_full['Ratio_Pret_Revenu'] = (df_full['Montant_Capital'] / df_full['Revenus_Totaux']).replace([np.inf, -np.inf], 0).fillna(0)
df_full['Frequence_Emprunt'] = df_full['Nb_Prets_Total']
col1, col2, col3, col4 = st.columns(4)
col1.metric("Prêts / client", f"{df_full['Nb_Prets_Total'].mean():.1f}",
delta=f"Max {df_full['Nb_Prets_Total'].max():.0f}")
col2.metric("Montant moy", f"{df_full['Montant_Moyen_Pret'].mean():,.0f} XOF")
col3.metric("Durée moy", f"{df_full['Duree_Moyenne_Semaines'].mean():.0f} sem")
col4.metric("Prêts actifs", f"{df_full['Nb_Prets_Actifs'].mean():.1f}")
fig_montant = px.scatter(df_full, x='Revenus_Totaux', y='Montant_Capital',
size='Nb_Prets_Total', color='Statut_Pro',
title="Montant prêt vs revenus",
hover_data=['Nom_Complet_Client'],
color_discrete_sequence=['#58a6ff', '#79c0ff', '#a5d6ff', '#c9d1d9'],
template=plotly_template)
fig_montant.update_traces(opacity=0.7)
st.plotly_chart(fig_montant, use_container_width=True)
# 5. FEATURES GÉOGRAPHIQUES
with st.expander(" Features Géographiques", expanded=True):
ville_count = df_full['Ville'].value_counts()
quartier_count = df_full['Quartier'].value_counts().head(10)
col1, col2 = st.columns(2)
with col1:
st.metric("Villes", len(ville_count))
fig_ville = px.pie(values=ville_count.values, names=ville_count.index,
title="Répartition par ville",
color_discrete_sequence=['#58a6ff', '#79c0ff', '#a5d6ff', '#c9d1d9', '#8b949e'],
template=plotly_template)
fig_ville.update_traces(textfont_family='Space Grotesk', opacity=0.85)
st.plotly_chart(fig_ville, use_container_width=True)
with col2:
st.metric("Quartiers", len(quartier_count))
fig_quartier = px.bar(x=quartier_count.values, y=quartier_count.index,
orientation='h', title="Top 10 quartiers",
color_discrete_sequence=['#58a6ff'],
template=plotly_template)
fig_quartier.update_traces(opacity=0.8)
st.plotly_chart(fig_quartier, use_container_width=True)
# 6. FEATURES TEMPORELLES
with st.expander(" Features Temporelles", expanded=True):
df_full['Date_Creation'] = pd.to_datetime(df_full.get('Date_Creation', pd.Series()), errors='coerce')
df_full['Anciennete_Client_Jours'] = (datetime.now() - df_full['Date_Creation']).dt.days
df_full['Anciennete_Client_Mois'] = (df_full['Anciennete_Client_Jours'] / 30.44).round(1)
col1, col2 = st.columns(2)
col1.metric("Ancienneté moy", f"{df_full['Anciennete_Client_Mois'].mean():.1f} mois")
col2.metric("Client + ancien", f"{df_full['Anciennete_Client_Mois'].max():.0f} mois")
df_full['Mois_Creation'] = df_full['Date_Creation'].dt.to_period('M').astype(str)
timeline = df_full.groupby('Mois_Creation').size().reset_index(name='Nb_Prets')
fig_timeline = px.line(timeline, x='Mois_Creation', y='Nb_Prets',
title="Évolution temporelle des prêts",
markers=True,
color_discrete_sequence=['#58a6ff'],
template=plotly_template)
fig_timeline.update_traces(line_width=2, marker_size=6, opacity=0.9)
st.plotly_chart(fig_timeline, use_container_width=True)
# MÉTRIQUES GLOBALES
st.divider()
st.markdown(" Métriques Globales
", unsafe_allow_html=True)
col1, col2, col3 = st.columns(3)
col1.metric("Taux défaut", "4.2 %", delta="Simulé")
col2.metric("Montant moy", "10,000 XOF")
col3.metric("Durée moy", "12 semaines")
# TABLE DATASET
st.divider()
st.markdown(" Dataset ML Enrichi
", unsafe_allow_html=True)
features_cols = [
'ID_Pret', 'ID_Client', 'Nom_Complet_Client', 'Age', 'Categorie_Age',
'Revenus_Totaux', 'Revenu_Net', 'Ratio_Charges_Revenus',
'Taux_Effort_Mensuel', 'Reste_A_Vivre', 'Patrimoine_Declare',
'Stabilite_Pro', 'Anciennete_Emploi', 'Statut_Pro',
'Nb_Prets_Total', 'Montant_Moyen_Pret', 'Ratio_Pret_Revenu',
'Ville', 'Quartier', 'Anciennete_Client_Mois', 'Statut'
]
features_cols = [col for col in features_cols if col in df_full.columns]
df_display = df_full[features_cols].copy()
numeric_cols = df_display.select_dtypes(include=[np.number]).columns
df_display[numeric_cols] = df_display[numeric_cols].round(2)
st.dataframe(df_display, use_container_width=True, height=400)
# EXPORT
st.divider()
st.markdown(" Export Dataset ML
", unsafe_allow_html=True)
col1, col2 = st.columns([2, 1])
with col1:
csv = df_full.to_csv(index=False).encode('utf-8')
st.download_button(
label=" Télécharger dataset complet (.csv)",
data=csv,
file_name=f'dataset_credit_scoring_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv',
mime='text/csv',
use_container_width=True
)
with col2:
st.info(f"**{len(df_full)} lignes** \n**{len(df_full.columns)} features**")
with st.expander(" Liste des features créées"):
created_features = [
'Age', 'Categorie_Age', 'Revenus_Totaux', 'Revenu_Net',
'Ratio_Charges_Revenus', 'Taux_Effort_Mensuel', 'Capacite_Endettement',
'Reste_A_Vivre', 'Stabilite_Pro', 'Nb_Prets_Total', 'Montant_Total_Emprunte',
'Montant_Moyen_Pret', 'Montant_Max_Pret', 'Ratio_Pret_Revenu',
'Frequence_Emprunt', 'Anciennete_Client_Jours', 'Anciennete_Client_Mois'
]
st.markdown("**Features engineering:**")
for i, feat in enumerate(created_features, 1):
st.markdown(f"`{i:02d}` {feat}")