Spaces:

CedM
/

oc_mlops_projet_2_dashboard

Running

oc_mlops_projet_2_dashboard / streamlit_app.py

GitLab CI

Déploiement Dashboard depuis GitLab CI - 2026-06-01 09:35:01

b8ad762 5 days ago

33.8 kB

	import os
	import time
	import tempfile
	import threading
	import requests
	import pandas as pd
	import json
	import re
	import pickle
	import matplotlib.pyplot as plt
	import shap

	import plotly.graph_objects as go

	import streamlit as st
	from streamlit.components.v1 import html as st_html

	from evidently import Report
	from evidently.metrics import ValueDrift, DriftedColumnsCount
	from evidently.tests import lte

	from huggingface_hub import hf_hub_download

	from functions import most_important_features_min_max
	features_min_max = most_important_features_min_max()

	# -----------------------------------------------------------------------
	# Configuration globale – URLs des services HF Spaces
	# -----------------------------------------------------------------------
	API_URL = os.environ.get("API_URL", "https://cedm-oc-mlops-projet-2.hf.space")
	DASHBOARD_P3_URL = "https://huggingface.co/spaces/CedM/oc_mlops_projet_3"
	KEEP_ALIVE_INTERVAL = 5 * 60 # 5 minutes – HF Spaces CPU gratuit dort après ~10-15 min d'inactivité


	# -----------------------------------------------------------------------
	# Keep-alive : maintien des containers HF Spaces en état "running"
	# Utilise @st.cache_resource pour ne démarrer le thread qu'une seule fois.
	# -----------------------------------------------------------------------
	@st.cache_resource(show_spinner=False)
	def start_keep_alive():
	"""
	Démarre un thread démon qui envoie des requêtes croisées vers l'API
	et le dashboard projet 3 toutes les 5 min afin de maintenir les containers
	HF Spaces actifs. Un ping est envoyé immédiatement au démarrage.
	"""
	def _loop():
	while True:
	# Ping immédiat, puis attente avant le prochain
	try:
	r = requests.get(f"{API_URL}/health", timeout=30)
	print(f"[keep-alive] Ping API → HTTP {r.status_code}")
	except Exception as e:
	print(f"[keep-alive] Ping API échoué: {e}")
	try:
	r = requests.get(DASHBOARD_P3_URL, timeout=30)
	print(f"[keep-alive] Ping Projet 3 → HTTP {r.status_code}")
	except Exception as e:
	print(f"[keep-alive] Ping Projet 3 échoué: {e}")
	time.sleep(KEEP_ALIVE_INTERVAL)

	thread = threading.Thread(target=_loop, daemon=True, name="keep-alive-api")
	thread.start()
	return thread


	start_keep_alive()

	# -----------------------------------------------------------------------
	def load_shap_explainer():
	"""Charge l'explainer SHAP depuis le fichier pickle."""
	with open("./hgb_shap_explainer.pkl", "rb") as f:
	return pickle.load(f)

	@st.cache_data(show_spinner=False)
	def load_model_columns():
	"""Charge la liste ordonnée des colonnes du modèle depuis le fichier de référence (hors TARGET et SK_ID_CURR)."""
	df_ref = pd.read_csv("./train_data_sp2_subsample_1.csv", sep=";", encoding="utf-8", nrows=1)
	return [c for c in df_ref.columns if c not in ("SK_ID_CURR", "TARGET")]

	# -----------------------------------------------------------------------
	# Configuration de la page
	# -----------------------------------------------------------------------
	st.set_page_config(page_title="Prêt à Dépenser", layout="wide", page_icon="💳")

	# -----------------------------------------------------------------------
	# CSS global – design professionnel & responsive mobile
	# -----------------------------------------------------------------------
	st.markdown("""
	<style>
	/* ── Palette ── */
	:root {
	--primary: #1a3a5c;
	--accent: #2e86de;
	--success: #27ae60;
	--danger: #e74c3c;
	--bg-card: #ffffff;
	--bg-page: #f0f4f8;
	--text-main: #1a1a2e;
	--text-muted:#6c757d;
	--radius: 12px;
	--shadow: 0 2px 12px rgba(0,0,0,.08);
	}

	/* ── Fond de page ── */
	.stApp { background-color: var(--bg-page); }

	/* ── Sidebar ── */
	section[data-testid="stSidebar"] {
	background: linear-gradient(180deg, var(--primary) 0%, #0d2137 100%);
	color: #fff;
	}
	section[data-testid="stSidebar"] * { color: #fff !important; }
	section[data-testid="stSidebar"] .stRadio label {
	font-size: 1rem;
	padding: 6px 0;
	cursor: pointer;
	}
	section[data-testid="stSidebar"] hr {
	border-color: rgba(255,255,255,.2) !important;
	}
	section[data-testid="stSidebar"] img {
	border-radius: 8px;
	margin-bottom: 8px;
	}

	/* ── Titres ── */
	h1, h2, h3 { color: var(--primary) !important; font-weight: 700 !important; }
	h1 { font-size: clamp(1.4rem, 3vw, 2rem) !important; }

	/* ── Cartes ── */
	.card {
	background: var(--bg-card);
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	padding: 1.2rem 1.5rem;
	margin-bottom: 1rem;
	}

	/* ── Boutons primaires ── */
	.stButton > button[kind="primary"] {
	background: linear-gradient(90deg, var(--accent), #1565c0) !important;
	color: #fff !important;
	border: none !important;
	border-radius: 8px !important;
	padding: 0.55rem 1.4rem !important;
	font-weight: 600 !important;
	letter-spacing: .3px;
	transition: opacity .2s;
	width: 100%;
	}
	.stButton > button[kind="primary"]:hover { opacity: .88; }

	/* ── Metrics ── */
	div[data-testid="metric-container"] {
	background: var(--bg-card);
	border-radius: var(--radius);
	box-shadow: var(--shadow);
	padding: .9rem 1rem;
	}

	/* ── Inputs ── */
	.stNumberInput input, .stSelectbox select {
	border-radius: 8px !important;
	border: 1.5px solid #d1d9e0 !important;
	}

	/* ── Divider ── */
	hr { border-color: #e0e6ed !important; }

	/* ── Responsive mobile : empiler les colonnes ── */
	@media (max-width: 768px) {
	[data-testid="column"] {
	width: 100% !important;
	flex: 1 1 100% !important;
	min-width: 100% !important;
	}
	h1 { font-size: 1.3rem !important; }
	.stButton > button { font-size: .9rem !important; }
	}

	/* ── Badges navigation ── */
	.nav-badge {
	display: inline-block;
	background: rgba(255,255,255,.15);
	border-radius: 20px;
	padding: 3px 10px;
	font-size: .78rem;
	margin-left: 6px;
	}
	</style>
	""", unsafe_allow_html=True)

	# -----------------------------------------------------------------------
	# SIDEBAR – logo, navigation, informations
	# -----------------------------------------------------------------------
	with st.sidebar:
	st.image("Logo_Pret_a_Depenser.png", use_container_width=True)
	#st.markdown("## 💳 ")
	st.markdown("Outil de Scoring des demandes de crédit")
	st.divider()

	page = st.radio(
	"Navigation",
	options=[
	"❓ Demande simple",
	"📋 Demande en lot",
	"📊 Dérive des données",
	"⚡ Latence & Erreurs API",
	],
	label_visibility="collapsed",
	)

	st.divider()
	st.markdown("""
	À propos de cet outil :
	- Prédiction rapide (10 variables)
	- Prédiction en lot (fichier CSV)
	- Surveillance de la dérive des données
	- Monitoring de l'API (latence / erreurs)
	""")
	st.markdown(f'<p style="color:#a8c8f0; font-size:.82rem; word-break:break-all;">🔗 API : <code style="color:#a8c8f0; background:rgba(255,255,255,.1); padding:2px 5px; border-radius:4px;">{API_URL}</code></p>', unsafe_allow_html=True)

	# -----------------------------------------------------------------------
	# En-tête principal (zone de contenu)
	# -----------------------------------------------------------------------
	st.markdown(f"""
	<div style="background:linear-gradient(90deg,#1a3a5c,#2e86de);
	border-radius:12px; padding:1rem 1.5rem; margin-bottom:1.2rem;
	color:#fff; display:flex; align-items:center; gap:1rem;">
	<div>
	<p style="color:#fff; margin:0; font-size:clamp(1.2rem,2.5vw,1.7rem); font-weight:700; line-height:1.2;">
	Outil de Scoring des demandes de crédit
	</p>
	<p style="margin:4px 0 0; opacity:.85; font-size:.95rem;">
	Prédiction par machine learning · Interprétabilité SHAP · Surveillance des données
	</p>
	</div>
	</div>
	""", unsafe_allow_html=True)

	# =====================================================================================================================
	# PAGE 1 – Prédiction simple
	# =====================================================================================================================
	if page == "❓ Demande simple":
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("🔢 Paramètres de la demande")
	st.caption("Les variables sont classées par ordre d'importance (SHAP). Remplissez les champs puis lancez la prédiction.")
	st.markdown('</div>', unsafe_allow_html=True)

	with st.container():
	col1, col2, col3, col4, col5 = st.columns(5)
	with col1:
	label = "EXT_SOURCE_3"
	ext_source_3 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
	help="Score normalisé provenant d'une source de données externe", key='ext_source_3')
	with col2:
	label = "EXT_SOURCE_2"
	ext_source_2 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
	help="Score normalisé provenant d'une source de données externe", key='ext_source_2')
	with col3:
	label = "EXT_SOURCE_1"
	ext_source_1 = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
	help="Score normalisé provenant d'une source de données externe", key='ext_source_1')
	with col4:
	label = "DAYS_EMPLOYED"
	days_employed = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0, step=1,
	help="Nombre de jours avant la demande où la personne a commencé son emploi actuel (chiffre négatif)", key='days_employed')
	with col5:
	label = "PAYMENT_RATE"
	payment_rate = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
	help="PAYMENT_RATE = AMT_ANNUITY / AMT_CREDIT", key='payment_rate')

	col6, col7, col8, col9, col10 = st.columns(5)
	with col6:
	label = "INSTAL_DPD_MEAN"
	instal_dpd_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
	help="Moyenne des jours de retard sur les paiements des crédits précédents (si négatif, mettre 0)", key='instal_dpd_mean')
	with col7:
	label = "PREV_CNT_PAYMENT_MEAN"
	prev_cnt_payment_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
	help="Moyenne des durées des crédits précédents", key='prev_cnt_payment_mean')
	with col8:
	label = "AMT_ANNUITY"
	amt_annuity = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=1.0,
	help="Annuité du prêt", key='amt_annuity')
	with col9:
	label = "CODE_GENDER"
	code_gender = st.selectbox(label, (features_min_max[label]["femme"], features_min_max[label]["homme"]), index=0,
	help="Sexe du client (0:Femme, 1:Homme)", key='code_gender')
	with col10:
	label = "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN"
	prev_name_product_type_walk_in_mean = st.number_input(label, min_value=features_min_max[label]["min"], max_value=features_min_max[label]["max"], value=0.0, step=0.01,
	help="Ratio de demandes précédentes faites en agence (walk-in)", key='prev_name_product_type_walk_in_mean')

	st.divider()
	if st.button("🔮 Lancer la prédiction", type="primary"):
	st.subheader("Résultat de la prédiction", divider="blue")

	features = {
	"EXT_SOURCE_3": ext_source_3,
	"EXT_SOURCE_2": ext_source_2,
	"EXT_SOURCE_1": ext_source_1,
	"DAYS_EMPLOYED": days_employed,
	"PAYMENT_RATE": payment_rate,
	"INSTAL_DPD_MEAN": instal_dpd_mean,
	"PREV_CNT_PAYMENT_MEAN": prev_cnt_payment_mean,
	"AMT_ANNUITY": amt_annuity,
	"CODE_GENDER": code_gender,
	"PREV_NAME_PRODUCT_TYPE_walk-in_MEAN": prev_name_product_type_walk_in_mean
	}

	try:
	with st.spinner("Analyse en cours..."):
	response = requests.post(f"{API_URL}/predict", json={"features": features}, timeout=30)

	if response.status_code == 200:
	result = response.json()
	prediction = result.get("prediction")
	proba_rejet = result.get("probability", 0) * 100
	seuil = result.get("threshold", 0.474) * 100

	col_result1, col_result2 = st.columns([1, 2])
	with col_result1:
	if prediction == 1:
	st.error("⚠️ DOSSIER REJETÉ")
	st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%",
	delta=f"+{proba_rejet - seuil:.1f}% au-dessus du seuil", delta_color="inverse")
	else:
	st.success("✅ DOSSIER ACCEPTÉ")
	st.metric(label="Probabilité de défaut", value=f"{proba_rejet:.1f}%",
	delta=f"{proba_rejet - seuil:.1f}% sous le seuil", delta_color="normal")

	with col_result2:
	st.info(f"Règle de décision :\n- Seuil de rejet : {seuil}%\n"
	f"- Probabilité de défaut : {proba_rejet:.2f}%\n"
	f"- Décision : {'REJET' if proba_rejet >= seuil else 'ACCEPTÉ'}")
	else:
	st.error(f"❌ Erreur API: {response.status_code} - {response.text}")

	except requests.exceptions.ConnectionError:
	st.error("❌ Impossible de se connecter à l'API.")
	except requests.exceptions.Timeout:
	st.error("❌ Timeout : l'API met trop de temps à répondre.")
	except Exception as e:
	st.error(f"❌ Erreur inattendue : {str(e)}")

	st.subheader("Interprétabilité SHAP", divider="gray")
	col20, col21 = st.columns(2)
	with col20:
	with st.container(border=True):
	st.markdown("🌍 Vue globale du modèle")
	st.image("./hgb_shap_global.png", use_container_width=True)

	with col21:
	with st.container(border=True):
	st.markdown("🔍 Vue locale – cette demande")
	try:
	explainer = load_shap_explainer()
	model_columns = load_model_columns()
	features_df = pd.DataFrame([features], columns=list(features.keys()))
	features_df = features_df.reindex(columns=model_columns)
	shap_values = explainer(features_df)
	fig_shap, ax = plt.subplots()
	shap.plots.waterfall(shap_values[0], max_display=10, show=False)
	st.pyplot(fig_shap, bbox_inches='tight')
	plt.close(fig_shap)
	except Exception as e:
	st.warning(f"⚠️ Impossible d'afficher le graphique SHAP local : {str(e)}")

	# =====================================================================================================================
	# PAGE 2 – Prédiction en lot
	# =====================================================================================================================
	elif page == "📋 Demande en lot":
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("📂 Import du fichier de demandes")
	st.caption("Formats acceptés : CSV avec séparateur `;`, encodage UTF-8, colonne `SK_ID_CURR`. Maximum 1 000 lignes traitées.")
	st.markdown('</div>', unsafe_allow_html=True)

	uploaded_file = st.file_uploader(
	"Sélectionnez un fichier CSV pré-traité",
	type=["csv"],
	help="Le fichier doit contenir toutes les variables nécessaires (sep=';', encodage UTF-8)."
	)

	if uploaded_file is not None:
	try:
	dataframe = pd.read_csv(uploaded_file, sep=';', encoding='utf-8', index_col='SK_ID_CURR', nrows=1000)
	st.subheader("Aperçu des données chargées", divider="gray")
	st.dataframe(dataframe, use_container_width=True)
	except Exception as e:
	st.error(f"❌ Erreur lors de la lecture du fichier CSV : {e}")
	st.stop()

	st.divider()
	if st.button("🔮 Lancer les prédictions en lot", type="primary", key="predict_batch"):
	try:
	with st.spinner("Analyse en cours..."):
	uploaded_file.seek(0)
	response = requests.post(
	f"{API_URL}/predict/file",
	files={"file": (uploaded_file.name, uploaded_file, "text/csv")},
	timeout=60
	)

	if response.status_code == 200:
	result = response.json()
	predictions = result.get("predictions", [])
	probabilities = result.get("probabilities", [])
	seuil = result.get("threshold", 0.474) * 100
	count = result.get("count", 0)

	st.success(f"✅ {count} prédictions effectuées avec succès !")

	col_stats1, col_stats2, col_stats3 = st.columns(3)
	nb_acceptes = predictions.count(0)
	nb_rejetes = predictions.count(1)
	with col_stats1:
	st.metric("Total des demandes", count)
	with col_stats2:
	st.metric("Dossiers acceptés ✅", nb_acceptes,
	delta=f"{nb_acceptes/count*100:.1f}%" if count > 0 else "0%")
	with col_stats3:
	st.metric("Dossiers rejetés ⚠️", nb_rejetes,
	delta=f"{nb_rejetes/count*100:.1f}%" if count > 0 else "0%", delta_color="inverse")

	dataframe_result = dataframe.copy()
	dataframe_result['Probabilite_defaut'] = [round(p * 100, 2) for p in probabilities]
	dataframe_result['Prediction'] = predictions

	st.subheader("Détail des prédictions", divider="gray")
	st.dataframe(
	dataframe_result[['Probabilite_defaut', 'Prediction']].style.map(
	lambda x: 'background-color: #ffcccc' if x == 1 else 'background-color: #ccffcc',
	subset=['Prediction']
	),
	use_container_width=True
	)

	csv_result = dataframe_result.to_csv(index=True, sep=';', encoding='utf-8')
	st.download_button(
	label="📥 Télécharger les résultats (CSV)",
	data=csv_result,
	file_name="predictions_resultats.csv",
	mime="text/csv"
	)
	st.info(f"Seuil de décision appliqué : {seuil:.1f}% — Les dossiers dont la probabilité dépasse ce seuil sont rejetés.")
	else:
	st.error(f"❌ Erreur API: {response.status_code} - {response.text}")

	except requests.exceptions.ConnectionError:
	st.error("❌ Impossible de se connecter à l'API.")
	except requests.exceptions.Timeout:
	st.error("❌ Timeout : l'API met trop de temps à répondre.")
	except Exception as e:
	st.error(f"❌ Erreur inattendue : {str(e)}")

	# =====================================================================================================================
	# PAGE 3 – Dérive des données
	# =====================================================================================================================
	elif page == "📊 Dérive des données":
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("📊 Analyse de la dérive des données")
	st.markdown(
	"🚧 La génération des rapports est disponible dès que le dataset actuel contient au moins 100 lignes. "
	"L'analyse porte sur les 1 000 dernières lignes pour garantir fiabilité et performance."
	)
	st.markdown('</div>', unsafe_allow_html=True)

	@st.cache_data(show_spinner=False, ttl=3600)
	def build_evidently_html(current_df, reference_df, report: int) -> str:
	if report == 0:
	report = Report([
	ValueDrift(column=important_features[0], method="ks"),
	ValueDrift(column=important_features[1], method="ks"),
	ValueDrift(column=important_features[2], method="ks"),
	ValueDrift(column=important_features[3], method="ks"),
	ValueDrift(column=important_features[4], method="ks"),
	ValueDrift(column=important_features[5], method="ks"),
	ValueDrift(column=important_features[6], method="ks"),
	ValueDrift(column=important_features[7], method="ks"),
	ValueDrift(column=important_features[8], method="chisquare"),
	ValueDrift(column=important_features[9], method="ks"),
	])
	else:
	report = Report([DriftedColumnsCount(share_tests=[lte(threshold_drift)])])

	my_eval = report.run(current_df, reference_df)
	tmp = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
	tmp.close()
	my_eval.save_html(tmp.name)
	with open(tmp.name, "r", encoding="utf-8") as f:
	html_content = f.read()
	os.remove(tmp.name)
	return html_content

	def drop_unnecessary_columns(df: pd.DataFrame):
	return df.drop(columns=[col for col in df.columns if col in ['SK_ID_CURR', 'TARGET', '_prediction', '_timestamp']])

	@st.cache_data(show_spinner=False, ttl=3600)
	def load_reference_data():
	return pd.read_pickle("./train_data_sp1.pkl", compression="gzip")

	reference_df = load_reference_data()
	reference_df = drop_unnecessary_columns(reference_df)

	try:
	current_file_path = hf_hub_download(repo_id="CedM/oc_mlops_projet_2", filename="data_io.csv", repo_type="dataset")
	current_df = pd.read_csv(current_file_path, encoding="utf-8", sep=";", index_col='SK_ID_CURR').tail(1000)
	current_df = drop_unnecessary_columns(current_df)
	except FileNotFoundError:
	st.error("❌ Fichier 'data_io.csv' introuvable sur le dépôt Hugging Face.")
	st.stop()
	except Exception as e:
	st.error(f"❌ Erreur lors du téléchargement : {str(e)}")
	st.stop()

	important_features = [
	"EXT_SOURCE_3", "EXT_SOURCE_2", "EXT_SOURCE_1", "DAYS_EMPLOYED",
	"PAYMENT_RATE", "INSTAL_DPD_MEAN", "PREV_CNT_PAYMENT_MEAN",
	"AMT_ANNUITY", "CODE_GENDER", "PREV_NAME_PRODUCT_TYPE_walk-in_MEAN"
	]
	threshold_drift = 0.50

	current_df_count = current_df.shape[0]
	st.info(f"📂 Dataset actuel : {current_df_count} lignes  \|  📂 Dataset de référence : {reference_df.shape[0]} lignes")
	st.divider()

	if current_df_count < 100:
	st.warning("⚠️ Le dataset actuel contient moins de 100 lignes. L'analyse sera disponible dès 100 lignes.")
	else:
	report_type = st.selectbox(
	"Type de rapport",
	options=[
	("Rapport détaillé – 10 variables les plus importantes (SHAP)", 0),
	("Rapport synthétique – toutes les autres variables", 1)
	],
	format_func=lambda x: x[0],
	key="report_type"
	)

	if st.button("🔮 Lancer l'analyse de dérive", type="primary", key="run_data_drift",
	help="Compare le dataset actuel au dataset de référence."):
	try:
	with st.spinner("Génération du rapport Evidently en cours..."):
	ref_df_filtered = reference_df.copy()
	cur_df_filtered = current_df.copy()
	if report_type[1] == 0:
	ref_df_filtered = ref_df_filtered[[c for c in ref_df_filtered.columns if c in important_features]]
	cur_df_filtered = cur_df_filtered[[c for c in cur_df_filtered.columns if c in important_features]]
	else:
	ref_df_filtered = ref_df_filtered.drop(columns=[c for c in important_features if c in ref_df_filtered.columns])
	cur_df_filtered = cur_df_filtered.drop(columns=[c for c in important_features if c in cur_df_filtered.columns])

	html_content = build_evidently_html(cur_df_filtered, ref_df_filtered, report=report_type[1])

	col_i1, col_i2 = st.columns(2)
	col_i1.metric("Variables dans le dataset de référence", ref_df_filtered.shape[1])
	col_i2.metric("Variables dans le dataset actuel", cur_df_filtered.shape[1])

	st.subheader("Rapport Evidently", divider="gray")
	st_html(html_content, height=900, scrolling=True)
	except Exception as e:
	st.error(f"❌ Erreur inattendue : {str(e)}")

	# =====================================================================================================================
	# PAGE 4 – Latence & Erreurs API
	# =====================================================================================================================
	elif page == "⚡ Latence & Erreurs API":
	st.markdown('<div class="card">', unsafe_allow_html=True)
	st.subheader("⚡ Monitoring de l'API – Latence & Erreurs")
	st.caption("Fenêtre d'analyse : 72 dernières heures. Les logs sont mis à jour après chaque appel aux endpoints `/predict` et `/predict/file`.")
	st.markdown('</div>', unsafe_allow_html=True)

	def load_api_logs():
	logs_data = []
	errors_list = []
	timestamps_all = []

	try:
	log_file_path = hf_hub_download(
	repo_id="CedM/oc_mlops_projet_2", filename="api_log.jsonl",
	repo_type="dataset", force_download=True
	)
	except Exception:
	log_file_path = "../api_log.jsonl"

	try:
	with open(log_file_path, 'r', encoding='utf-8') as f:
	for line in f:
	try:
	log_entry = json.loads(line.strip())
	message = log_entry.get("message", "")
	ts = pd.to_datetime(log_entry.get("timestamp"), utc=True, errors="coerce")
	if pd.isna(ts):
	continue
	timestamps_all.append(ts)

	match_single = re.search(r"Prédiction effectuée avec succès:.*temps=([\d.]+)s\)", message)
	match_batch = re.search(r"Prédictions effectuées avec succès: (\d+) résultats \(temps d'exécution: ([\d.]+)s\)", message)

	if match_single:
	logs_data.append({"timestamp": ts, "endpoint": "/predict", "latency": float(match_single.group(1)), "prediction_count": 1})
	elif match_batch:
	logs_data.append({"timestamp": ts, "endpoint": "/predict/file", "latency": float(match_batch.group(2)), "prediction_count": int(match_batch.group(1))})

	level = str(log_entry.get("level", "")).upper()
	if level in ("ERROR", "WARNING"):
	errors_list.append({"timestamp": ts, "level": level, "message": message, "module": log_entry.get("module", "")})

	except json.JSONDecodeError:
	continue
	except FileNotFoundError:
	st.error("❌ Fichier 'api_log.jsonl' introuvable.")
	return {"df": pd.DataFrame(), "errors_df": pd.DataFrame(), "error_counts": {}}

	df = pd.DataFrame(logs_data)
	df_err = pd.DataFrame(errors_list)

	last_ts = max(timestamps_all) if timestamps_all else pd.Timestamp.now(tz='UTC')
	cutoff_time = last_ts - pd.Timedelta(hours=72)

	if not df.empty:
	df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True, errors="coerce")
	df = df.dropna(subset=["timestamp"])
	df = df[df["timestamp"] >= cutoff_time]

	if not df_err.empty:
	df_err["timestamp"] = pd.to_datetime(df_err["timestamp"], utc=True, errors="coerce")
	df_err = df_err.dropna(subset=["timestamp"])
	df_err = df_err[df_err["timestamp"] >= cutoff_time]

	error_counts = {
	"ERROR": int(df_err[df_err["level"] == "ERROR"].shape[0]) if not df_err.empty else 0,
	"WARNING": int(df_err[df_err["level"] == "WARNING"].shape[0]) if not df_err.empty else 0,
	"TOTAL": int(df_err.shape[0]) if not df_err.empty else 0
	}
	return {"df": df, "errors_df": df_err, "error_counts": error_counts}

	if st.button("🔄 Rafraîchir les données", type="primary", key="refresh_logs",
	help="Recharge les logs depuis le HF Dataset."):
	st.session_state['refresh_logs_triggered'] = True
	with st.spinner("Chargement des logs..."):
	result = load_api_logs()

	df_logs = result.get('df', pd.DataFrame())
	df_errors = result.get('errors_df', pd.DataFrame())
	error_counts = result.get('error_counts', {})

	if df_logs.empty and df_errors.empty:
	st.warning("⚠️ Aucune donnée de latence ou d'erreur disponible dans les logs de l'API.")
	else:
	if not df_logs.empty:
	st.info(f"📊 {len(df_logs)} appels de prédiction enregistrés sur les 72 dernières heures.")
	st.subheader("Latence au fil du temps", divider="gray")

	fig = go.Figure()
	colors = {"/predict": "#2e86de", "/predict/file": "#e67e22"}

	for endpoint in df_logs["endpoint"].unique():
	df_ep = df_logs[df_logs["endpoint"] == endpoint].sort_values("timestamp")
	if len(df_ep) > 0:
	fig.add_trace(go.Scatter(
	x=df_ep["timestamp"], y=df_ep["latency"],
	mode='lines+markers', name=endpoint,
	marker=dict(size=8, color=colors.get(endpoint, "#333"), line=dict(width=1, color='DarkSlateGrey')),
	line=dict(color=colors.get(endpoint, "#333"), width=2),
	hovertemplate="<b>%{x}</b><br>Latence: %{y:.4f}s<br>Nb prédictions: %{customdata}<extra></extra>",
	customdata=df_ep["prediction_count"]
	))
	if len(df_ep) >= 2:
	mean_l = df_ep["latency"].mean()
	std_l = df_ep["latency"].std()
	fig.add_hline(y=mean_l, line_dash="dash", line_color=colors.get(endpoint, "#333"),
	annotation_text=f"Moy. {endpoint}: {mean_l:.4f}s", annotation_position="right", annotation_font_size=10)
	fig.add_hrect(y0=max(0, mean_l - std_l), y1=mean_l + std_l,
	fillcolor=colors.get(endpoint, "#333"), opacity=0.1, line_width=0,
	annotation_text=f"±σ {endpoint}: {std_l:.4f}s", annotation_position="right", annotation_font_size=9)

	fig.update_layout(
	title="Latence de prédiction de l'API (fenêtre 72h)",
	xaxis_title="Date et heure (UTC)", yaxis_title="Temps d'exécution (s)",
	hovermode="x unified",
	legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
	height=480,
	plot_bgcolor="#f8fafc",
	paper_bgcolor="#f8fafc",
	)
	st.plotly_chart(fig, use_container_width=True)

	st.subheader("Erreurs & Warnings (72h)", divider="gray")
	col_err, col_warn, col_total = st.columns(3)
	with col_err:
	st.metric("🔴 Erreurs (ERROR)", error_counts.get('ERROR', 0))
	with col_warn:
	st.metric("🟡 Warnings (WARNING)", error_counts.get('WARNING', 0))
	with col_total:
	st.metric("📋 Total événements", error_counts.get('TOTAL', 0))

	if not df_errors.empty:
	st.subheader("Détail des événements", divider="gray")
	st.dataframe(df_errors.sort_values('timestamp', ascending=False).reset_index(drop=True), use_container_width=True)