oc_mlops_projet_2_dashboard / streamlit_app.py
GitLab CI
Déploiement Dashboard depuis GitLab CI - 2026-06-01 09:35:01
b8ad762
import os
import time
import tempfile
import threading
import requests
import pandas as pd
import json
import re
import pickle
import matplotlib.pyplot as plt
import shap
import plotly.graph_objects as go
import streamlit as st
from streamlit.components.v1 import html as st_html
from evidently import Report
from evidently.metrics import ValueDrift, DriftedColumnsCount
from evidently.tests import lte
from huggingface_hub import hf_hub_download
from functions import most_important_features_min_max
features_min_max = most_important_features_min_max()
# -----------------------------------------------------------------------
# Configuration globale – URLs des services HF Spaces
# -----------------------------------------------------------------------
API_URL = os.environ.get("API_URL", "https://cedm-oc-mlops-projet-2.hf.space")
DASHBOARD_P3_URL = "https://huggingface.co/spaces/CedM/oc_mlops_projet_3"
KEEP_ALIVE_INTERVAL = 5 * 60 # 5 minutes – HF Spaces CPU gratuit dort après ~10-15 min d'inactivité
# -----------------------------------------------------------------------
# Keep-alive : maintien des containers HF Spaces en état "running"
# Utilise @st.cache_resource pour ne démarrer le thread qu'une seule fois.
# -----------------------------------------------------------------------
@st.cache_resource(show_spinner=False)
def start_keep_alive():
"""
Démarre un thread démon qui envoie des requêtes croisées vers l'API
et le dashboard projet 3 toutes les 5 min afin de maintenir les containers
HF Spaces actifs. Un ping est envoyé immédiatement au démarrage.
"""
def _loop():
while True:
# Ping immédiat, puis attente avant le prochain
try:
r = requests.get(f"{API_URL}/health", timeout=30)
print(f"[keep-alive] Ping API → HTTP {r.status_code}")
except Exception as e:
print(f"[keep-alive] Ping API échoué: {e}")
try:
r = requests.get(DASHBOARD_P3_URL, timeout=30)
print(f"[keep-alive] Ping Projet 3 → HTTP {r.status_code}")
except Exception as e:
print(f"[keep-alive] Ping Projet 3 échoué: {e}")
time.sleep(KEEP_ALIVE_INTERVAL)
thread = threading.Thread(target=_loop, daemon=True, name="keep-alive-api")
thread.start()
return thread
start_keep_alive()
# -----------------------------------------------------------------------
def load_shap_explainer():
"""Charge l'explainer SHAP depuis le fichier pickle."""
with open("./hgb_shap_explainer.pkl", "rb") as f:
return pickle.load(f)
@st.cache_data(show_spinner=False)
def load_model_columns():
"""Charge la liste ordonnée des colonnes du modèle depuis le fichier de référence (hors TARGET et SK_ID_CURR)."""
df_ref = pd.read_csv("./train_data_sp2_subsample_1.csv", sep=";", encoding="utf-8", nrows=1)
return [c for c in df_ref.columns if c not in ("SK_ID_CURR", "TARGET")]
# -----------------------------------------------------------------------
# Configuration de la page
# -----------------------------------------------------------------------
st.set_page_config(page_title="Prêt à Dépenser", layout="wide", page_icon="💳")
# -----------------------------------------------------------------------
# CSS global – design professionnel & responsive mobile
# -----------------------------------------------------------------------
st.markdown("""
<style>
/* ── Palette ── */
:root {
--primary: #1a3a5c;
--accent: #2e86de;
--success: #27ae60;
--danger: #e74c3c;
--bg-card: #ffffff;
--bg-page: #f0f4f8;
--text-main: #1a1a2e;
--text-muted:#6c757d;
--radius: 12px;
--shadow: 0 2px 12px rgba(0,0,0,.08);
}
/* ── Fond de page ── */
.stApp { background-color: var(--bg-page); }
/* ── Sidebar ── */
section[data-testid="stSidebar"] {
background: linear-gradient(180deg, var(--primary) 0%, #0d2137 100%);
color: #fff;
}
section[data-testid="stSidebar"] * { color: #fff !important; }
section[data-testid="stSidebar"] .stRadio label {
font-size: 1rem;
padding: 6px 0;
cursor: pointer;
}
section[data-testid="stSidebar"] hr {
border-color: rgba(255,255,255,.2) !important;
}
section[data-testid="stSidebar"] img {
border-radius: 8px;
margin-bottom: 8px;
}
/* ── Titres ── */
h1, h2, h3 { color: var(--primary) !important; font-weight: 700 !important; }
h1 { font-size: clamp(1.4rem, 3vw, 2rem) !important; }
/* ── Cartes ── */
.card {
background: var(--bg-card);
border-radius: var(--radius);
box-shadow: var(--shadow);
padding: 1.2rem 1.5rem;
margin-bottom: 1rem;
}
/* ── Boutons primaires ── */
.stButton > button[kind="primary"] {
background: linear-gradient(90deg, var(--accent), #1565c0) !important;
color: #fff !important;
border: none !important;
border-radius: 8px !important;
padding: 0.55rem 1.4rem !important;
font-weight: 600 !important;
letter-spacing: .3px;
transition: opacity .2s;
width: 100%;
}
.stButton > button[kind="primary"]:hover { opacity: .88; }
/* ── Metrics ── */
div[data-testid="metric-container"] {
background: var(--bg-card);
border-radius: var(--radius);
box-shadow: var(--shadow);
padding: .9rem 1rem;
}
/* ── Inputs ── */
.stNumberInput input, .stSelectbox select {
border-radius: 8px !important;
border: 1.5px solid #d1d9e0 !important;
}
/* ── Divider ── */
hr { border-color: #e0e6ed !important; }
/* ── Responsive mobile : empiler les colonnes ── */
@media (max-width: 768px) {
[data-testid="column"] {
width: 100% !important;
flex: 1 1 100% !important;
min-width: 100% !important;
}
h1 { font-size: 1.3rem !important; }
.stButton > button { font-size: .9rem !important; }
}
/* ── Badges navigation ── */
.nav-badge {
display: inline-block;
background: rgba(255,255,255,.15);
border-radius: 20px;
padding: 3px 10px;
font-size: .78rem;
margin-left: 6px;
}
</style>
""", unsafe_allow_html=True)
# -----------------------------------------------------------------------
# SIDEBAR – logo, navigation, informations
# -----------------------------------------------------------------------
with st.sidebar:
st.image("Logo_Pret_a_Depenser.png", use_container_width=True)
#st.markdown("## 💳 ")
st.markdown("*Outil de Scoring des demandes de crédit*")
st.divider()
page = st.radio(
"Navigation",
options=[
"❓ Demande simple",
"📋 Demande en lot",
"📊 Dérive des données",
"⚡ Latence & Erreurs API",
],
label_visibility="collapsed",
)
st.divider()
st.markdown("""
**À propos de cet outil :**
- Prédiction rapide (10 variables)
- Prédiction en lot (fichier CSV)
- Surveillance de la dérive des données
- Monitoring de l'API (latence / erreurs)
""")
st.markdown(f'<p style="color:#a8c8f0; font-size:.82rem; word-break:break-all;">🔗 API : <code style="color:#a8c8f0; background:rgba(255,255,255,.1); padding:2px 5px; border-radius:4px;">{API_URL}</code></p>', unsafe_allow_html=True)
# -----------------------------------------------------------------------
# En-tête principal (zone de contenu)
# -----------------------------------------------------------------------
st.markdown(f"""
<div style="background:linear-gradient(90deg,#1a3a5c,#2e86de);
border-radius:12px; padding:1rem 1.5rem; margin-bottom:1.2rem;
color:#fff; display:flex; align-items:center; gap:1rem;">
<div>
<p style="color:#fff; margin:0; font-size:clamp(1.2rem,2.5vw,1.7rem); font-weight:700; line-height:1.2;">
Outil de Scoring des demandes de crédit
</p>
<p style="margin:4px 0 0; opacity:.85; font-size:.95rem;">
Prédiction par machine learning · Interprétabilité SHAP · Surveillance des données
</p>
</div>
</div>
""", unsafe_allow_html=True)
# =====================================================================================================================
# PAGE 1 – Prédiction simple
# =====================================================================================================================
if page == "❓ Demande simple":
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("🔢 Paramètres de la demande")
st.caption("Les variables sont classées par ordre d'importance (SHAP). Remplissez les champs puis lancez la prédiction.")
st.markdown('</div>', unsafe_allow_html=True)
with st.container():
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
label = "EXT_SOURCE_3"
ext_source_3 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
help="Score normalisé provenant d'une source de données externe", key='ext_source_3')
with col2:
label = "EXT_SOURCE_2"
ext_source_2 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
help="Score normalisé provenant d'une source de données externe", key='ext_source_2')
with col3:
label = "EXT_SOURCE_1"
ext_source_1 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
help="Score normalisé provenant d'une source de données externe", key='ext_source_1')
with col4:
label = "DAYS_EMPLOYED"
days_employed = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0, step=1,
help="Nombre de jours avant la demande où la personne a commencé son emploi actuel (chiffre négatif)", key='days_employed')
with col5:
label = "PAYMENT_RATE"
payment_rate = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
help="PAYMENT_RATE = AMT_ANNUITY / AMT_CREDIT", key='payment_rate')
col6, col7, col8, col9, col10 = st.columns(5)
with col6:
label = "INSTAL_DPD_MEAN"
instal_dpd_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
help="Moyenne des jours de retard sur les paiements des crédits précédents (si négatif, mettre 0)", key='instal_dpd_mean')
with col7:
label = "PREV_CNT_PAYMENT_MEAN"
prev_cnt_payment_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
help="Moyenne des durées des crédits précédents", key='prev_cnt_payment_mean')
with col8:
label = "AMT_ANNUITY"
amt_annuity = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
help="Annuité du prêt", key='amt_annuity')
with col9:
label = "CODE_GENDER"
code_gender = st.selectbox(label, (features_min_max[label]["femme"], features_min_max[label]["homme"]), index=0,
help="Sexe du client (0:Femme, 1:Homme)", key='code_gender')
with col10:
label = "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN"
prev_name_product_type_walk_in_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
help="Ratio de demandes précédentes faites en agence (walk-in)", key='prev_name_product_type_walk_in_mean')
st.divider()
if st.button("🔮 Lancer la prédiction", type="primary"):
st.subheader("Résultat de la prédiction", divider="blue")
features = {
"EXT_SOURCE_3": ext_source_3,
"EXT_SOURCE_2": ext_source_2,
"EXT_SOURCE_1": ext_source_1,
"DAYS_EMPLOYED": days_employed,
"PAYMENT_RATE": payment_rate,
"INSTAL_DPD_MEAN": instal_dpd_mean,
"PREV_CNT_PAYMENT_MEAN": prev_cnt_payment_mean,
"AMT_ANNUITY": amt_annuity,
"CODE_GENDER": code_gender,
"PREV_NAME_PRODUCT_TYPE_walk-in_MEAN": prev_name_product_type_walk_in_mean
}
try:
with st.spinner("Analyse en cours..."):
response = requests.post(f"{API_URL}/predict", json={"features": features}, timeout=30)
if response.status_code == 200:
result = response.json()
prediction = result.get("prediction")
proba_rejet = result.get("probability", 0) * 100
seuil = result.get("threshold", 0.474) * 100
col_result1, col_result2 = st.columns([1, 2])
with col_result1:
if prediction == 1:
st.error("⚠️ **DOSSIER REJETÉ**")
st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%",
delta=f"+{proba_rejet - seuil:.1f}% au-dessus du seuil", delta_color="inverse")
else:
st.success("✅ **DOSSIER ACCEPTÉ**")
st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%",
delta=f"{proba_rejet - seuil:.1f}% sous le seuil", delta_color="normal")
with col_result2:
st.info(f"**Règle de décision :**\n- Seuil de rejet : **{seuil}%**\n"
f"- Probabilité de défaut : **{proba_rejet:.2f}%**\n"
f"- Décision : **{'REJET' if proba_rejet >= seuil else 'ACCEPTÉ'}**")
else:
st.error(f"❌ Erreur API: {response.status_code} - {response.text}")
except requests.exceptions.ConnectionError:
st.error("❌ Impossible de se connecter à l'API.")
except requests.exceptions.Timeout:
st.error("❌ Timeout : l'API met trop de temps à répondre.")
except Exception as e:
st.error(f"❌ Erreur inattendue : {str(e)}")
st.subheader("Interprétabilité SHAP", divider="gray")
col20, col21 = st.columns(2)
with col20:
with st.container(border=True):
st.markdown("**🌍 Vue globale du modèle**")
st.image("./hgb_shap_global.png", use_container_width=True)
with col21:
with st.container(border=True):
st.markdown("**🔍 Vue locale – cette demande**")
try:
explainer = load_shap_explainer()
model_columns = load_model_columns()
features_df = pd.DataFrame([features], columns=list(features.keys()))
features_df = features_df.reindex(columns=model_columns)
shap_values = explainer(features_df)
fig_shap, ax = plt.subplots()
shap.plots.waterfall(shap_values[0], max_display=10, show=False)
st.pyplot(fig_shap, bbox_inches='tight')
plt.close(fig_shap)
except Exception as e:
st.warning(f"⚠️ Impossible d'afficher le graphique SHAP local : {str(e)}")
# =====================================================================================================================
# PAGE 2 – Prédiction en lot
# =====================================================================================================================
elif page == "📋 Demande en lot":
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("📂 Import du fichier de demandes")
st.caption("Formats acceptés : CSV avec séparateur `;`, encodage UTF-8, colonne `SK_ID_CURR`. Maximum 1 000 lignes traitées.")
st.markdown('</div>', unsafe_allow_html=True)
uploaded_file = st.file_uploader(
"Sélectionnez un fichier CSV pré-traité",
type=["csv"],
help="Le fichier doit contenir toutes les variables nécessaires (sep=';', encodage UTF-8)."
)
if uploaded_file is not None:
try:
dataframe = pd.read_csv(uploaded_file, sep=';', encoding='utf-8', index_col='SK_ID_CURR', nrows=1000)
st.subheader("Aperçu des données chargées", divider="gray")
st.dataframe(dataframe, use_container_width=True)
except Exception as e:
st.error(f"❌ Erreur lors de la lecture du fichier CSV : {e}")
st.stop()
st.divider()
if st.button("🔮 Lancer les prédictions en lot", type="primary", key="predict_batch"):
try:
with st.spinner("Analyse en cours..."):
uploaded_file.seek(0)
response = requests.post(
f"{API_URL}/predict/file",
files={"file": (uploaded_file.name, uploaded_file, "text/csv")},
timeout=60
)
if response.status_code == 200:
result = response.json()
predictions = result.get("predictions", [])
probabilities = result.get("probabilities", [])
seuil = result.get("threshold", 0.474) * 100
count = result.get("count", 0)
st.success(f"✅ **{count} prédictions effectuées avec succès !**")
col_stats1, col_stats2, col_stats3 = st.columns(3)
nb_acceptes = predictions.count(0)
nb_rejetes = predictions.count(1)
with col_stats1:
st.metric("Total des demandes", count)
with col_stats2:
st.metric("Dossiers acceptés ✅", nb_acceptes,
delta=f"{nb_acceptes/count*100:.1f}%" if count > 0 else "0%")
with col_stats3:
st.metric("Dossiers rejetés ⚠️", nb_rejetes,
delta=f"{nb_rejetes/count*100:.1f}%" if count > 0 else "0%", delta_color="inverse")
dataframe_result = dataframe.copy()
dataframe_result['Probabilite_defaut'] = [round(p * 100, 2) for p in probabilities]
dataframe_result['Prediction'] = predictions
st.subheader("Détail des prédictions", divider="gray")
st.dataframe(
dataframe_result[['Probabilite_defaut', 'Prediction']].style.map(
lambda x: 'background-color: #ffcccc' if x == 1 else 'background-color: #ccffcc',
subset=['Prediction']
),
use_container_width=True
)
csv_result = dataframe_result.to_csv(index=True, sep=';', encoding='utf-8')
st.download_button(
label="📥 Télécharger les résultats (CSV)",
data=csv_result,
file_name="predictions_resultats.csv",
mime="text/csv"
)
st.info(f"**Seuil de décision appliqué : {seuil:.1f}%** — Les dossiers dont la probabilité dépasse ce seuil sont rejetés.")
else:
st.error(f"❌ Erreur API: {response.status_code} - {response.text}")
except requests.exceptions.ConnectionError:
st.error("❌ Impossible de se connecter à l'API.")
except requests.exceptions.Timeout:
st.error("❌ Timeout : l'API met trop de temps à répondre.")
except Exception as e:
st.error(f"❌ Erreur inattendue : {str(e)}")
# =====================================================================================================================
# PAGE 3 – Dérive des données
# =====================================================================================================================
elif page == "📊 Dérive des données":
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("📊 Analyse de la dérive des données")
st.markdown(
"🚧 La génération des rapports est disponible dès que le dataset actuel contient **au moins 100 lignes**. "
"L'analyse porte sur les **1 000 dernières lignes** pour garantir fiabilité et performance."
)
st.markdown('</div>', unsafe_allow_html=True)
@st.cache_data(show_spinner=False, ttl=3600)
def build_evidently_html(current_df, reference_df, report: int) -> str:
if report == 0:
report = Report([
ValueDrift(column=important_features[0], method="ks"),
ValueDrift(column=important_features[1], method="ks"),
ValueDrift(column=important_features[2], method="ks"),
ValueDrift(column=important_features[3], method="ks"),
ValueDrift(column=important_features[4], method="ks"),
ValueDrift(column=important_features[5], method="ks"),
ValueDrift(column=important_features[6], method="ks"),
ValueDrift(column=important_features[7], method="ks"),
ValueDrift(column=important_features[8], method="chisquare"),
ValueDrift(column=important_features[9], method="ks"),
])
else:
report = Report([DriftedColumnsCount(share_tests=[lte(threshold_drift)])])
my_eval = report.run(current_df, reference_df)
tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
tmp.close()
my_eval.save_html(tmp.name)
with open(tmp.name, "r", encoding="utf-8") as f:
html_content = f.read()
os.remove(tmp.name)
return html_content
def drop_unnecessary_columns(df: pd.DataFrame):
return df.drop(columns=[col for col in df.columns if col in ['SK_ID_CURR', 'TARGET', '_prediction', '_timestamp']])
@st.cache_data(show_spinner=False, ttl=3600)
def load_reference_data():
return pd.read_pickle("./train_data_sp1.pkl", compression="gzip")
reference_df = load_reference_data()
reference_df = drop_unnecessary_columns(reference_df)
try:
current_file_path = hf_hub_download(repo_id="CedM/oc_mlops_projet_2", filename="data_io.csv", repo_type="dataset")
current_df = pd.read_csv(current_file_path, encoding="utf-8", sep=";", index_col='SK_ID_CURR').tail(1000)
current_df = drop_unnecessary_columns(current_df)
except FileNotFoundError:
st.error("❌ Fichier 'data_io.csv' introuvable sur le dépôt Hugging Face.")
st.stop()
except Exception as e:
st.error(f"❌ Erreur lors du téléchargement : {str(e)}")
st.stop()
important_features = [
"EXT_SOURCE_3", "EXT_SOURCE_2", "EXT_SOURCE_1", "DAYS_EMPLOYED",
"PAYMENT_RATE", "INSTAL_DPD_MEAN", "PREV_CNT_PAYMENT_MEAN",
"AMT_ANNUITY", "CODE_GENDER", "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN"
]
threshold_drift = 0.50
current_df_count = current_df.shape[0]
st.info(f"📂 **Dataset actuel :** {current_df_count} lignes &nbsp;|&nbsp; 📂 **Dataset de référence :** {reference_df.shape[0]} lignes")
st.divider()
if current_df_count < 100:
st.warning("⚠️ Le dataset actuel contient moins de 100 lignes. L'analyse sera disponible dès 100 lignes.")
else:
report_type = st.selectbox(
"Type de rapport",
options=[
("Rapport détaillé – 10 variables les plus importantes (SHAP)", 0),
("Rapport synthétique – toutes les autres variables", 1)
],
format_func=lambda x: x[0],
key="report_type"
)
if st.button("🔮 Lancer l'analyse de dérive", type="primary", key="run_data_drift",
help="Compare le dataset actuel au dataset de référence."):
try:
with st.spinner("Génération du rapport Evidently en cours..."):
ref_df_filtered = reference_df.copy()
cur_df_filtered = current_df.copy()
if report_type[1] == 0:
ref_df_filtered = ref_df_filtered[[c for c in ref_df_filtered.columns if c in important_features]]
cur_df_filtered = cur_df_filtered[[c for c in cur_df_filtered.columns if c in important_features]]
else:
ref_df_filtered = ref_df_filtered.drop(columns=[c for c in important_features if c in ref_df_filtered.columns])
cur_df_filtered = cur_df_filtered.drop(columns=[c for c in important_features if c in cur_df_filtered.columns])
html_content = build_evidently_html(cur_df_filtered, ref_df_filtered, report=report_type[1])
col_i1, col_i2 = st.columns(2)
col_i1.metric("Variables dans le dataset de référence", ref_df_filtered.shape[1])
col_i2.metric("Variables dans le dataset actuel", cur_df_filtered.shape[1])
st.subheader("Rapport Evidently", divider="gray")
st_html(html_content, height=900, scrolling=True)
except Exception as e:
st.error(f"❌ Erreur inattendue : {str(e)}")
# =====================================================================================================================
# PAGE 4 – Latence & Erreurs API
# =====================================================================================================================
elif page == "⚡ Latence & Erreurs API":
st.markdown('<div class="card">', unsafe_allow_html=True)
st.subheader("⚡ Monitoring de l'API – Latence & Erreurs")
st.caption("Fenêtre d'analyse : **72 dernières heures**. Les logs sont mis à jour après chaque appel aux endpoints `/predict` et `/predict/file`.")
st.markdown('</div>', unsafe_allow_html=True)
def load_api_logs():
logs_data = []
errors_list = []
timestamps_all = []
try:
log_file_path = hf_hub_download(
repo_id="CedM/oc_mlops_projet_2", filename="api_log.jsonl",
repo_type="dataset", force_download=True
)
except Exception:
log_file_path = "../api_log.jsonl"
try:
with open(log_file_path, 'r', encoding='utf-8') as f:
for line in f:
try:
log_entry = json.loads(line.strip())
message = log_entry.get("message", "")
ts = pd.to_datetime(log_entry.get("timestamp"), utc=True, errors="coerce")
if pd.isna(ts):
continue
timestamps_all.append(ts)
match_single = re.search(r"Prédiction effectuée avec succès:.*temps=([\d.]+)s\)", message)
match_batch = re.search(r"Prédictions effectuées avec succès: (\d+) résultats \(temps d'exécution: ([\d.]+)s\)", message)
if match_single:
logs_data.append({"timestamp": ts, "endpoint": "/predict", "latency": float(match_single.group(1)), "prediction_count": 1})
elif match_batch:
logs_data.append({"timestamp": ts, "endpoint": "/predict/file", "latency": float(match_batch.group(2)), "prediction_count": int(match_batch.group(1))})
level = str(log_entry.get("level", "")).upper()
if level in ("ERROR", "WARNING"):
errors_list.append({"timestamp": ts, "level": level, "message": message, "module": log_entry.get("module", "")})
except json.JSONDecodeError:
continue
except FileNotFoundError:
st.error("❌ Fichier 'api_log.jsonl' introuvable.")
return {"df": pd.DataFrame(), "errors_df": pd.DataFrame(), "error_counts": {}}
df = pd.DataFrame(logs_data)
df_err = pd.DataFrame(errors_list)
last_ts = max(timestamps_all) if timestamps_all else pd.Timestamp.now(tz='UTC')
cutoff_time = last_ts - pd.Timedelta(hours=72)
if not df.empty:
df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True, errors="coerce")
df = df.dropna(subset=["timestamp"])
df = df[df["timestamp"] >= cutoff_time]
if not df_err.empty:
df_err["timestamp"] = pd.to_datetime(df_err["timestamp"], utc=True, errors="coerce")
df_err = df_err.dropna(subset=["timestamp"])
df_err = df_err[df_err["timestamp"] >= cutoff_time]
error_counts = {
"ERROR": int(df_err[df_err["level"] == "ERROR"].shape[0]) if not df_err.empty else 0,
"WARNING": int(df_err[df_err["level"] == "WARNING"].shape[0]) if not df_err.empty else 0,
"TOTAL": int(df_err.shape[0]) if not df_err.empty else 0
}
return {"df": df, "errors_df": df_err, "error_counts": error_counts}
if st.button("🔄 Rafraîchir les données", type="primary", key="refresh_logs",
help="Recharge les logs depuis le HF Dataset."):
st.session_state['refresh_logs_triggered'] = True
with st.spinner("Chargement des logs..."):
result = load_api_logs()
df_logs = result.get('df', pd.DataFrame())
df_errors = result.get('errors_df', pd.DataFrame())
error_counts = result.get('error_counts', {})
if df_logs.empty and df_errors.empty:
st.warning("⚠️ Aucune donnée de latence ou d'erreur disponible dans les logs de l'API.")
else:
if not df_logs.empty:
st.info(f"📊 **{len(df_logs)} appels de prédiction** enregistrés sur les 72 dernières heures.")
st.subheader("Latence au fil du temps", divider="gray")
fig = go.Figure()
colors = {"/predict": "#2e86de", "/predict/file": "#e67e22"}
for endpoint in df_logs["endpoint"].unique():
df_ep = df_logs[df_logs["endpoint"] == endpoint].sort_values("timestamp")
if len(df_ep) > 0:
fig.add_trace(go.Scatter(
x=df_ep["timestamp"], y=df_ep["latency"],
mode='lines+markers', name=endpoint,
marker=dict(size=8, color=colors.get(endpoint, "#333"), line=dict(width=1, color='DarkSlateGrey')),
line=dict(color=colors.get(endpoint, "#333"), width=2),
hovertemplate="<b>%{x}</b><br>Latence: %{y:.4f}s<br>Nb prédictions: %{customdata}<extra></extra>",
customdata=df_ep["prediction_count"]
))
if len(df_ep) >= 2:
mean_l = df_ep["latency"].mean()
std_l = df_ep["latency"].std()
fig.add_hline(y=mean_l, line_dash="dash", line_color=colors.get(endpoint, "#333"),
annotation_text=f"Moy. {endpoint}: {mean_l:.4f}s", annotation_position="right", annotation_font_size=10)
fig.add_hrect(y0=max(0, mean_l - std_l), y1=mean_l + std_l,
fillcolor=colors.get(endpoint, "#333"), opacity=0.1, line_width=0,
annotation_text=f"±σ {endpoint}: {std_l:.4f}s", annotation_position="right", annotation_font_size=9)
fig.update_layout(
title="Latence de prédiction de l'API (fenêtre 72h)",
xaxis_title="Date et heure (UTC)", yaxis_title="Temps d'exécution (s)",
hovermode="x unified",
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
height=480,
plot_bgcolor="#f8fafc",
paper_bgcolor="#f8fafc",
)
st.plotly_chart(fig, use_container_width=True)
st.subheader("Erreurs & Warnings (72h)", divider="gray")
col_err, col_warn, col_total = st.columns(3)
with col_err:
st.metric("🔴 Erreurs (ERROR)", error_counts.get('ERROR', 0))
with col_warn:
st.metric("🟡 Warnings (WARNING)", error_counts.get('WARNING', 0))
with col_total:
st.metric("📋 Total événements", error_counts.get('TOTAL', 0))
if not df_errors.empty:
st.subheader("Détail des événements", divider="gray")
st.dataframe(df_errors.sort_values('timestamp', ascending=False).reset_index(drop=True), use_container_width=True)