| import os |
| import time |
| import tempfile |
| import threading |
| import requests |
| import pandas as pd |
| import json |
| import re |
| import pickle |
| import matplotlib.pyplot as plt |
| import shap |
|
|
| import plotly.graph_objects as go |
|
|
| import streamlit as st |
| from streamlit.components.v1 import html as st_html |
|
|
| from evidently import Report |
| from evidently.metrics import ValueDrift, DriftedColumnsCount |
| from evidently.tests import lte |
|
|
| from huggingface_hub import hf_hub_download |
|
|
| from functions import most_important_features_min_max |
| features_min_max = most_important_features_min_max() |
|
|
| |
| |
| |
| API_URL = os.environ.get("API_URL", "https://cedm-oc-mlops-projet-2.hf.space") |
| DASHBOARD_P3_URL = "https://huggingface.co/spaces/CedM/oc_mlops_projet_3" |
| KEEP_ALIVE_INTERVAL = 5 * 60 |
|
|
|
|
| |
| |
| |
| |
| @st.cache_resource(show_spinner=False) |
| def start_keep_alive(): |
| """ |
| Démarre un thread démon qui envoie des requêtes croisées vers l'API |
| et le dashboard projet 3 toutes les 5 min afin de maintenir les containers |
| HF Spaces actifs. Un ping est envoyé immédiatement au démarrage. |
| """ |
| def _loop(): |
| while True: |
| |
| try: |
| r = requests.get(f"{API_URL}/health", timeout=30) |
| print(f"[keep-alive] Ping API → HTTP {r.status_code}") |
| except Exception as e: |
| print(f"[keep-alive] Ping API échoué: {e}") |
| try: |
| r = requests.get(DASHBOARD_P3_URL, timeout=30) |
| print(f"[keep-alive] Ping Projet 3 → HTTP {r.status_code}") |
| except Exception as e: |
| print(f"[keep-alive] Ping Projet 3 échoué: {e}") |
| time.sleep(KEEP_ALIVE_INTERVAL) |
|
|
| thread = threading.Thread(target=_loop, daemon=True, name="keep-alive-api") |
| thread.start() |
| return thread |
|
|
|
|
| start_keep_alive() |
|
|
| |
| def load_shap_explainer(): |
| """Charge l'explainer SHAP depuis le fichier pickle.""" |
| with open("./hgb_shap_explainer.pkl", "rb") as f: |
| return pickle.load(f) |
|
|
| @st.cache_data(show_spinner=False) |
| def load_model_columns(): |
| """Charge la liste ordonnée des colonnes du modèle depuis le fichier de référence (hors TARGET et SK_ID_CURR).""" |
| df_ref = pd.read_csv("./train_data_sp2_subsample_1.csv", sep=";", encoding="utf-8", nrows=1) |
| return [c for c in df_ref.columns if c not in ("SK_ID_CURR", "TARGET")] |
|
|
| |
| |
| |
| st.set_page_config(page_title="Prêt à Dépenser", layout="wide", page_icon="💳") |
|
|
| |
| |
| |
| st.markdown(""" |
| <style> |
| /* ── Palette ── */ |
| :root { |
| --primary: #1a3a5c; |
| --accent: #2e86de; |
| --success: #27ae60; |
| --danger: #e74c3c; |
| --bg-card: #ffffff; |
| --bg-page: #f0f4f8; |
| --text-main: #1a1a2e; |
| --text-muted:#6c757d; |
| --radius: 12px; |
| --shadow: 0 2px 12px rgba(0,0,0,.08); |
| } |
| |
| /* ── Fond de page ── */ |
| .stApp { background-color: var(--bg-page); } |
| |
| /* ── Sidebar ── */ |
| section[data-testid="stSidebar"] { |
| background: linear-gradient(180deg, var(--primary) 0%, #0d2137 100%); |
| color: #fff; |
| } |
| section[data-testid="stSidebar"] * { color: #fff !important; } |
| section[data-testid="stSidebar"] .stRadio label { |
| font-size: 1rem; |
| padding: 6px 0; |
| cursor: pointer; |
| } |
| section[data-testid="stSidebar"] hr { |
| border-color: rgba(255,255,255,.2) !important; |
| } |
| section[data-testid="stSidebar"] img { |
| border-radius: 8px; |
| margin-bottom: 8px; |
| } |
| |
| /* ── Titres ── */ |
| h1, h2, h3 { color: var(--primary) !important; font-weight: 700 !important; } |
| h1 { font-size: clamp(1.4rem, 3vw, 2rem) !important; } |
| |
| /* ── Cartes ── */ |
| .card { |
| background: var(--bg-card); |
| border-radius: var(--radius); |
| box-shadow: var(--shadow); |
| padding: 1.2rem 1.5rem; |
| margin-bottom: 1rem; |
| } |
| |
| /* ── Boutons primaires ── */ |
| .stButton > button[kind="primary"] { |
| background: linear-gradient(90deg, var(--accent), #1565c0) !important; |
| color: #fff !important; |
| border: none !important; |
| border-radius: 8px !important; |
| padding: 0.55rem 1.4rem !important; |
| font-weight: 600 !important; |
| letter-spacing: .3px; |
| transition: opacity .2s; |
| width: 100%; |
| } |
| .stButton > button[kind="primary"]:hover { opacity: .88; } |
| |
| /* ── Metrics ── */ |
| div[data-testid="metric-container"] { |
| background: var(--bg-card); |
| border-radius: var(--radius); |
| box-shadow: var(--shadow); |
| padding: .9rem 1rem; |
| } |
| |
| /* ── Inputs ── */ |
| .stNumberInput input, .stSelectbox select { |
| border-radius: 8px !important; |
| border: 1.5px solid #d1d9e0 !important; |
| } |
| |
| /* ── Divider ── */ |
| hr { border-color: #e0e6ed !important; } |
| |
| /* ── Responsive mobile : empiler les colonnes ── */ |
| @media (max-width: 768px) { |
| [data-testid="column"] { |
| width: 100% !important; |
| flex: 1 1 100% !important; |
| min-width: 100% !important; |
| } |
| h1 { font-size: 1.3rem !important; } |
| .stButton > button { font-size: .9rem !important; } |
| } |
| |
| /* ── Badges navigation ── */ |
| .nav-badge { |
| display: inline-block; |
| background: rgba(255,255,255,.15); |
| border-radius: 20px; |
| padding: 3px 10px; |
| font-size: .78rem; |
| margin-left: 6px; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| |
| with st.sidebar: |
| st.image("Logo_Pret_a_Depenser.png", use_container_width=True) |
| |
| st.markdown("*Outil de Scoring des demandes de crédit*") |
| st.divider() |
|
|
| page = st.radio( |
| "Navigation", |
| options=[ |
| "❓ Demande simple", |
| "📋 Demande en lot", |
| "📊 Dérive des données", |
| "⚡ Latence & Erreurs API", |
| ], |
| label_visibility="collapsed", |
| ) |
|
|
| st.divider() |
| st.markdown(""" |
| **À propos de cet outil :** |
| - Prédiction rapide (10 variables) |
| - Prédiction en lot (fichier CSV) |
| - Surveillance de la dérive des données |
| - Monitoring de l'API (latence / erreurs) |
| """) |
| st.markdown(f'<p style="color:#a8c8f0; font-size:.82rem; word-break:break-all;">🔗 API : <code style="color:#a8c8f0; background:rgba(255,255,255,.1); padding:2px 5px; border-radius:4px;">{API_URL}</code></p>', unsafe_allow_html=True) |
|
|
| |
| |
| |
| st.markdown(f""" |
| <div style="background:linear-gradient(90deg,#1a3a5c,#2e86de); |
| border-radius:12px; padding:1rem 1.5rem; margin-bottom:1.2rem; |
| color:#fff; display:flex; align-items:center; gap:1rem;"> |
| <div> |
| <p style="color:#fff; margin:0; font-size:clamp(1.2rem,2.5vw,1.7rem); font-weight:700; line-height:1.2;"> |
| Outil de Scoring des demandes de crédit |
| </p> |
| <p style="margin:4px 0 0; opacity:.85; font-size:.95rem;"> |
| Prédiction par machine learning · Interprétabilité SHAP · Surveillance des données |
| </p> |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| |
| |
| |
| if page == "❓ Demande simple": |
| st.markdown('<div class="card">', unsafe_allow_html=True) |
| st.subheader("🔢 Paramètres de la demande") |
| st.caption("Les variables sont classées par ordre d'importance (SHAP). Remplissez les champs puis lancez la prédiction.") |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| with st.container(): |
| col1, col2, col3, col4, col5 = st.columns(5) |
| with col1: |
| label = "EXT_SOURCE_3" |
| ext_source_3 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01, |
| help="Score normalisé provenant d'une source de données externe", key='ext_source_3') |
| with col2: |
| label = "EXT_SOURCE_2" |
| ext_source_2 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01, |
| help="Score normalisé provenant d'une source de données externe", key='ext_source_2') |
| with col3: |
| label = "EXT_SOURCE_1" |
| ext_source_1 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01, |
| help="Score normalisé provenant d'une source de données externe", key='ext_source_1') |
| with col4: |
| label = "DAYS_EMPLOYED" |
| days_employed = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0, step=1, |
| help="Nombre de jours avant la demande où la personne a commencé son emploi actuel (chiffre négatif)", key='days_employed') |
| with col5: |
| label = "PAYMENT_RATE" |
| payment_rate = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01, |
| help="PAYMENT_RATE = AMT_ANNUITY / AMT_CREDIT", key='payment_rate') |
|
|
| col6, col7, col8, col9, col10 = st.columns(5) |
| with col6: |
| label = "INSTAL_DPD_MEAN" |
| instal_dpd_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0, |
| help="Moyenne des jours de retard sur les paiements des crédits précédents (si négatif, mettre 0)", key='instal_dpd_mean') |
| with col7: |
| label = "PREV_CNT_PAYMENT_MEAN" |
| prev_cnt_payment_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0, |
| help="Moyenne des durées des crédits précédents", key='prev_cnt_payment_mean') |
| with col8: |
| label = "AMT_ANNUITY" |
| amt_annuity = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0, |
| help="Annuité du prêt", key='amt_annuity') |
| with col9: |
| label = "CODE_GENDER" |
| code_gender = st.selectbox(label, (features_min_max[label]["femme"], features_min_max[label]["homme"]), index=0, |
| help="Sexe du client (0:Femme, 1:Homme)", key='code_gender') |
| with col10: |
| label = "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN" |
| prev_name_product_type_walk_in_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01, |
| help="Ratio de demandes précédentes faites en agence (walk-in)", key='prev_name_product_type_walk_in_mean') |
|
|
| st.divider() |
| if st.button("🔮 Lancer la prédiction", type="primary"): |
| st.subheader("Résultat de la prédiction", divider="blue") |
|
|
| features = { |
| "EXT_SOURCE_3": ext_source_3, |
| "EXT_SOURCE_2": ext_source_2, |
| "EXT_SOURCE_1": ext_source_1, |
| "DAYS_EMPLOYED": days_employed, |
| "PAYMENT_RATE": payment_rate, |
| "INSTAL_DPD_MEAN": instal_dpd_mean, |
| "PREV_CNT_PAYMENT_MEAN": prev_cnt_payment_mean, |
| "AMT_ANNUITY": amt_annuity, |
| "CODE_GENDER": code_gender, |
| "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN": prev_name_product_type_walk_in_mean |
| } |
|
|
| try: |
| with st.spinner("Analyse en cours..."): |
| response = requests.post(f"{API_URL}/predict", json={"features": features}, timeout=30) |
|
|
| if response.status_code == 200: |
| result = response.json() |
| prediction = result.get("prediction") |
| proba_rejet = result.get("probability", 0) * 100 |
| seuil = result.get("threshold", 0.474) * 100 |
|
|
| col_result1, col_result2 = st.columns([1, 2]) |
| with col_result1: |
| if prediction == 1: |
| st.error("⚠️ **DOSSIER REJETÉ**") |
| st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%", |
| delta=f"+{proba_rejet - seuil:.1f}% au-dessus du seuil", delta_color="inverse") |
| else: |
| st.success("✅ **DOSSIER ACCEPTÉ**") |
| st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%", |
| delta=f"{proba_rejet - seuil:.1f}% sous le seuil", delta_color="normal") |
|
|
| with col_result2: |
| st.info(f"**Règle de décision :**\n- Seuil de rejet : **{seuil}%**\n" |
| f"- Probabilité de défaut : **{proba_rejet:.2f}%**\n" |
| f"- Décision : **{'REJET' if proba_rejet >= seuil else 'ACCEPTÉ'}**") |
| else: |
| st.error(f"❌ Erreur API: {response.status_code} - {response.text}") |
|
|
| except requests.exceptions.ConnectionError: |
| st.error("❌ Impossible de se connecter à l'API.") |
| except requests.exceptions.Timeout: |
| st.error("❌ Timeout : l'API met trop de temps à répondre.") |
| except Exception as e: |
| st.error(f"❌ Erreur inattendue : {str(e)}") |
|
|
| st.subheader("Interprétabilité SHAP", divider="gray") |
| col20, col21 = st.columns(2) |
| with col20: |
| with st.container(border=True): |
| st.markdown("**🌍 Vue globale du modèle**") |
| st.image("./hgb_shap_global.png", use_container_width=True) |
|
|
| with col21: |
| with st.container(border=True): |
| st.markdown("**🔍 Vue locale – cette demande**") |
| try: |
| explainer = load_shap_explainer() |
| model_columns = load_model_columns() |
| features_df = pd.DataFrame([features], columns=list(features.keys())) |
| features_df = features_df.reindex(columns=model_columns) |
| shap_values = explainer(features_df) |
| fig_shap, ax = plt.subplots() |
| shap.plots.waterfall(shap_values[0], max_display=10, show=False) |
| st.pyplot(fig_shap, bbox_inches='tight') |
| plt.close(fig_shap) |
| except Exception as e: |
| st.warning(f"⚠️ Impossible d'afficher le graphique SHAP local : {str(e)}") |
|
|
| |
| |
| |
| elif page == "📋 Demande en lot": |
| st.markdown('<div class="card">', unsafe_allow_html=True) |
| st.subheader("📂 Import du fichier de demandes") |
| st.caption("Formats acceptés : CSV avec séparateur `;`, encodage UTF-8, colonne `SK_ID_CURR`. Maximum 1 000 lignes traitées.") |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| uploaded_file = st.file_uploader( |
| "Sélectionnez un fichier CSV pré-traité", |
| type=["csv"], |
| help="Le fichier doit contenir toutes les variables nécessaires (sep=';', encodage UTF-8)." |
| ) |
|
|
| if uploaded_file is not None: |
| try: |
| dataframe = pd.read_csv(uploaded_file, sep=';', encoding='utf-8', index_col='SK_ID_CURR', nrows=1000) |
| st.subheader("Aperçu des données chargées", divider="gray") |
| st.dataframe(dataframe, use_container_width=True) |
| except Exception as e: |
| st.error(f"❌ Erreur lors de la lecture du fichier CSV : {e}") |
| st.stop() |
|
|
| st.divider() |
| if st.button("🔮 Lancer les prédictions en lot", type="primary", key="predict_batch"): |
| try: |
| with st.spinner("Analyse en cours..."): |
| uploaded_file.seek(0) |
| response = requests.post( |
| f"{API_URL}/predict/file", |
| files={"file": (uploaded_file.name, uploaded_file, "text/csv")}, |
| timeout=60 |
| ) |
|
|
| if response.status_code == 200: |
| result = response.json() |
| predictions = result.get("predictions", []) |
| probabilities = result.get("probabilities", []) |
| seuil = result.get("threshold", 0.474) * 100 |
| count = result.get("count", 0) |
|
|
| st.success(f"✅ **{count} prédictions effectuées avec succès !**") |
|
|
| col_stats1, col_stats2, col_stats3 = st.columns(3) |
| nb_acceptes = predictions.count(0) |
| nb_rejetes = predictions.count(1) |
| with col_stats1: |
| st.metric("Total des demandes", count) |
| with col_stats2: |
| st.metric("Dossiers acceptés ✅", nb_acceptes, |
| delta=f"{nb_acceptes/count*100:.1f}%" if count > 0 else "0%") |
| with col_stats3: |
| st.metric("Dossiers rejetés ⚠️", nb_rejetes, |
| delta=f"{nb_rejetes/count*100:.1f}%" if count > 0 else "0%", delta_color="inverse") |
|
|
| dataframe_result = dataframe.copy() |
| dataframe_result['Probabilite_defaut'] = [round(p * 100, 2) for p in probabilities] |
| dataframe_result['Prediction'] = predictions |
|
|
| st.subheader("Détail des prédictions", divider="gray") |
| st.dataframe( |
| dataframe_result[['Probabilite_defaut', 'Prediction']].style.map( |
| lambda x: 'background-color: #ffcccc' if x == 1 else 'background-color: #ccffcc', |
| subset=['Prediction'] |
| ), |
| use_container_width=True |
| ) |
|
|
| csv_result = dataframe_result.to_csv(index=True, sep=';', encoding='utf-8') |
| st.download_button( |
| label="📥 Télécharger les résultats (CSV)", |
| data=csv_result, |
| file_name="predictions_resultats.csv", |
| mime="text/csv" |
| ) |
| st.info(f"**Seuil de décision appliqué : {seuil:.1f}%** — Les dossiers dont la probabilité dépasse ce seuil sont rejetés.") |
| else: |
| st.error(f"❌ Erreur API: {response.status_code} - {response.text}") |
|
|
| except requests.exceptions.ConnectionError: |
| st.error("❌ Impossible de se connecter à l'API.") |
| except requests.exceptions.Timeout: |
| st.error("❌ Timeout : l'API met trop de temps à répondre.") |
| except Exception as e: |
| st.error(f"❌ Erreur inattendue : {str(e)}") |
|
|
| |
| |
| |
| elif page == "📊 Dérive des données": |
| st.markdown('<div class="card">', unsafe_allow_html=True) |
| st.subheader("📊 Analyse de la dérive des données") |
| st.markdown( |
| "🚧 La génération des rapports est disponible dès que le dataset actuel contient **au moins 100 lignes**. " |
| "L'analyse porte sur les **1 000 dernières lignes** pour garantir fiabilité et performance." |
| ) |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| @st.cache_data(show_spinner=False, ttl=3600) |
| def build_evidently_html(current_df, reference_df, report: int) -> str: |
| if report == 0: |
| report = Report([ |
| ValueDrift(column=important_features[0], method="ks"), |
| ValueDrift(column=important_features[1], method="ks"), |
| ValueDrift(column=important_features[2], method="ks"), |
| ValueDrift(column=important_features[3], method="ks"), |
| ValueDrift(column=important_features[4], method="ks"), |
| ValueDrift(column=important_features[5], method="ks"), |
| ValueDrift(column=important_features[6], method="ks"), |
| ValueDrift(column=important_features[7], method="ks"), |
| ValueDrift(column=important_features[8], method="chisquare"), |
| ValueDrift(column=important_features[9], method="ks"), |
| ]) |
| else: |
| report = Report([DriftedColumnsCount(share_tests=[lte(threshold_drift)])]) |
|
|
| my_eval = report.run(current_df, reference_df) |
| tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False) |
| tmp.close() |
| my_eval.save_html(tmp.name) |
| with open(tmp.name, "r", encoding="utf-8") as f: |
| html_content = f.read() |
| os.remove(tmp.name) |
| return html_content |
|
|
| def drop_unnecessary_columns(df: pd.DataFrame): |
| return df.drop(columns=[col for col in df.columns if col in ['SK_ID_CURR', 'TARGET', '_prediction', '_timestamp']]) |
|
|
| @st.cache_data(show_spinner=False, ttl=3600) |
| def load_reference_data(): |
| return pd.read_pickle("./train_data_sp1.pkl", compression="gzip") |
|
|
| reference_df = load_reference_data() |
| reference_df = drop_unnecessary_columns(reference_df) |
|
|
| try: |
| current_file_path = hf_hub_download(repo_id="CedM/oc_mlops_projet_2", filename="data_io.csv", repo_type="dataset") |
| current_df = pd.read_csv(current_file_path, encoding="utf-8", sep=";", index_col='SK_ID_CURR').tail(1000) |
| current_df = drop_unnecessary_columns(current_df) |
| except FileNotFoundError: |
| st.error("❌ Fichier 'data_io.csv' introuvable sur le dépôt Hugging Face.") |
| st.stop() |
| except Exception as e: |
| st.error(f"❌ Erreur lors du téléchargement : {str(e)}") |
| st.stop() |
|
|
| important_features = [ |
| "EXT_SOURCE_3", "EXT_SOURCE_2", "EXT_SOURCE_1", "DAYS_EMPLOYED", |
| "PAYMENT_RATE", "INSTAL_DPD_MEAN", "PREV_CNT_PAYMENT_MEAN", |
| "AMT_ANNUITY", "CODE_GENDER", "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN" |
| ] |
| threshold_drift = 0.50 |
|
|
| current_df_count = current_df.shape[0] |
| st.info(f"📂 **Dataset actuel :** {current_df_count} lignes | 📂 **Dataset de référence :** {reference_df.shape[0]} lignes") |
| st.divider() |
|
|
| if current_df_count < 100: |
| st.warning("⚠️ Le dataset actuel contient moins de 100 lignes. L'analyse sera disponible dès 100 lignes.") |
| else: |
| report_type = st.selectbox( |
| "Type de rapport", |
| options=[ |
| ("Rapport détaillé – 10 variables les plus importantes (SHAP)", 0), |
| ("Rapport synthétique – toutes les autres variables", 1) |
| ], |
| format_func=lambda x: x[0], |
| key="report_type" |
| ) |
|
|
| if st.button("🔮 Lancer l'analyse de dérive", type="primary", key="run_data_drift", |
| help="Compare le dataset actuel au dataset de référence."): |
| try: |
| with st.spinner("Génération du rapport Evidently en cours..."): |
| ref_df_filtered = reference_df.copy() |
| cur_df_filtered = current_df.copy() |
| if report_type[1] == 0: |
| ref_df_filtered = ref_df_filtered[[c for c in ref_df_filtered.columns if c in important_features]] |
| cur_df_filtered = cur_df_filtered[[c for c in cur_df_filtered.columns if c in important_features]] |
| else: |
| ref_df_filtered = ref_df_filtered.drop(columns=[c for c in important_features if c in ref_df_filtered.columns]) |
| cur_df_filtered = cur_df_filtered.drop(columns=[c for c in important_features if c in cur_df_filtered.columns]) |
|
|
| html_content = build_evidently_html(cur_df_filtered, ref_df_filtered, report=report_type[1]) |
|
|
| col_i1, col_i2 = st.columns(2) |
| col_i1.metric("Variables dans le dataset de référence", ref_df_filtered.shape[1]) |
| col_i2.metric("Variables dans le dataset actuel", cur_df_filtered.shape[1]) |
|
|
| st.subheader("Rapport Evidently", divider="gray") |
| st_html(html_content, height=900, scrolling=True) |
| except Exception as e: |
| st.error(f"❌ Erreur inattendue : {str(e)}") |
|
|
| |
| |
| |
| elif page == "⚡ Latence & Erreurs API": |
| st.markdown('<div class="card">', unsafe_allow_html=True) |
| st.subheader("⚡ Monitoring de l'API – Latence & Erreurs") |
| st.caption("Fenêtre d'analyse : **72 dernières heures**. Les logs sont mis à jour après chaque appel aux endpoints `/predict` et `/predict/file`.") |
| st.markdown('</div>', unsafe_allow_html=True) |
|
|
| def load_api_logs(): |
| logs_data = [] |
| errors_list = [] |
| timestamps_all = [] |
|
|
| try: |
| log_file_path = hf_hub_download( |
| repo_id="CedM/oc_mlops_projet_2", filename="api_log.jsonl", |
| repo_type="dataset", force_download=True |
| ) |
| except Exception: |
| log_file_path = "../api_log.jsonl" |
|
|
| try: |
| with open(log_file_path, 'r', encoding='utf-8') as f: |
| for line in f: |
| try: |
| log_entry = json.loads(line.strip()) |
| message = log_entry.get("message", "") |
| ts = pd.to_datetime(log_entry.get("timestamp"), utc=True, errors="coerce") |
| if pd.isna(ts): |
| continue |
| timestamps_all.append(ts) |
|
|
| match_single = re.search(r"Prédiction effectuée avec succès:.*temps=([\d.]+)s\)", message) |
| match_batch = re.search(r"Prédictions effectuées avec succès: (\d+) résultats \(temps d'exécution: ([\d.]+)s\)", message) |
|
|
| if match_single: |
| logs_data.append({"timestamp": ts, "endpoint": "/predict", "latency": float(match_single.group(1)), "prediction_count": 1}) |
| elif match_batch: |
| logs_data.append({"timestamp": ts, "endpoint": "/predict/file", "latency": float(match_batch.group(2)), "prediction_count": int(match_batch.group(1))}) |
|
|
| level = str(log_entry.get("level", "")).upper() |
| if level in ("ERROR", "WARNING"): |
| errors_list.append({"timestamp": ts, "level": level, "message": message, "module": log_entry.get("module", "")}) |
|
|
| except json.JSONDecodeError: |
| continue |
| except FileNotFoundError: |
| st.error("❌ Fichier 'api_log.jsonl' introuvable.") |
| return {"df": pd.DataFrame(), "errors_df": pd.DataFrame(), "error_counts": {}} |
|
|
| df = pd.DataFrame(logs_data) |
| df_err = pd.DataFrame(errors_list) |
|
|
| last_ts = max(timestamps_all) if timestamps_all else pd.Timestamp.now(tz='UTC') |
| cutoff_time = last_ts - pd.Timedelta(hours=72) |
|
|
| if not df.empty: |
| df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True, errors="coerce") |
| df = df.dropna(subset=["timestamp"]) |
| df = df[df["timestamp"] >= cutoff_time] |
|
|
| if not df_err.empty: |
| df_err["timestamp"] = pd.to_datetime(df_err["timestamp"], utc=True, errors="coerce") |
| df_err = df_err.dropna(subset=["timestamp"]) |
| df_err = df_err[df_err["timestamp"] >= cutoff_time] |
|
|
| error_counts = { |
| "ERROR": int(df_err[df_err["level"] == "ERROR"].shape[0]) if not df_err.empty else 0, |
| "WARNING": int(df_err[df_err["level"] == "WARNING"].shape[0]) if not df_err.empty else 0, |
| "TOTAL": int(df_err.shape[0]) if not df_err.empty else 0 |
| } |
| return {"df": df, "errors_df": df_err, "error_counts": error_counts} |
|
|
| if st.button("🔄 Rafraîchir les données", type="primary", key="refresh_logs", |
| help="Recharge les logs depuis le HF Dataset."): |
| st.session_state['refresh_logs_triggered'] = True |
| with st.spinner("Chargement des logs..."): |
| result = load_api_logs() |
|
|
| df_logs = result.get('df', pd.DataFrame()) |
| df_errors = result.get('errors_df', pd.DataFrame()) |
| error_counts = result.get('error_counts', {}) |
|
|
| if df_logs.empty and df_errors.empty: |
| st.warning("⚠️ Aucune donnée de latence ou d'erreur disponible dans les logs de l'API.") |
| else: |
| if not df_logs.empty: |
| st.info(f"📊 **{len(df_logs)} appels de prédiction** enregistrés sur les 72 dernières heures.") |
| st.subheader("Latence au fil du temps", divider="gray") |
|
|
| fig = go.Figure() |
| colors = {"/predict": "#2e86de", "/predict/file": "#e67e22"} |
|
|
| for endpoint in df_logs["endpoint"].unique(): |
| df_ep = df_logs[df_logs["endpoint"] == endpoint].sort_values("timestamp") |
| if len(df_ep) > 0: |
| fig.add_trace(go.Scatter( |
| x=df_ep["timestamp"], y=df_ep["latency"], |
| mode='lines+markers', name=endpoint, |
| marker=dict(size=8, color=colors.get(endpoint, "#333"), line=dict(width=1, color='DarkSlateGrey')), |
| line=dict(color=colors.get(endpoint, "#333"), width=2), |
| hovertemplate="<b>%{x}</b><br>Latence: %{y:.4f}s<br>Nb prédictions: %{customdata}<extra></extra>", |
| customdata=df_ep["prediction_count"] |
| )) |
| if len(df_ep) >= 2: |
| mean_l = df_ep["latency"].mean() |
| std_l = df_ep["latency"].std() |
| fig.add_hline(y=mean_l, line_dash="dash", line_color=colors.get(endpoint, "#333"), |
| annotation_text=f"Moy. {endpoint}: {mean_l:.4f}s", annotation_position="right", annotation_font_size=10) |
| fig.add_hrect(y0=max(0, mean_l - std_l), y1=mean_l + std_l, |
| fillcolor=colors.get(endpoint, "#333"), opacity=0.1, line_width=0, |
| annotation_text=f"±σ {endpoint}: {std_l:.4f}s", annotation_position="right", annotation_font_size=9) |
|
|
| fig.update_layout( |
| title="Latence de prédiction de l'API (fenêtre 72h)", |
| xaxis_title="Date et heure (UTC)", yaxis_title="Temps d'exécution (s)", |
| hovermode="x unified", |
| legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01), |
| height=480, |
| plot_bgcolor="#f8fafc", |
| paper_bgcolor="#f8fafc", |
| ) |
| st.plotly_chart(fig, use_container_width=True) |
|
|
| st.subheader("Erreurs & Warnings (72h)", divider="gray") |
| col_err, col_warn, col_total = st.columns(3) |
| with col_err: |
| st.metric("🔴 Erreurs (ERROR)", error_counts.get('ERROR', 0)) |
| with col_warn: |
| st.metric("🟡 Warnings (WARNING)", error_counts.get('WARNING', 0)) |
| with col_total: |
| st.metric("📋 Total événements", error_counts.get('TOTAL', 0)) |
|
|
| if not df_errors.empty: |
| st.subheader("Détail des événements", divider="gray") |
| st.dataframe(df_errors.sort_values('timestamp', ascending=False).reset_index(drop=True), use_container_width=True) |
|
|
|
|