Spaces:

ericjedha
/

fraud-detection-streamlit

Sleeping

App Files Files Community

fraud-detection-streamlit / app.py

ericjedha

Update app.py

462278f verified 2 months ago

raw

history blame contribute delete

33.2 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from sqlalchemy import create_engine, text
	from datetime import datetime, timedelta
	import os

	# ========================== CONFIGURATION ==========================
	st.set_page_config(
	page_title="Fraud Detection Dashboard",
	page_icon="🕵🏻",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Couleurs (variables pour personnalisation facile)
	COLOR_FRAUD = "#FF4B4B" # Rouge
	COLOR_NO_FRAUD = "#00CC66" # Vert
	COLOR_SAVED = "#FFD700" # Or pour l'argent économisé

	# ========================== CONNEXION BASE DE DONNÉES ==========================
	@st.cache_resource
	def get_db_connection():
	"""Connexion directe à Neon DB (non-pooler, stable pour petit volume)"""
	try:
	database_url = os.environ.get("NEON_DB_FRAUD_URL")
	if not database_url:
	st.error("❌ Variable NEON_DB_FRAUD_URL non trouvée dans les secrets Hugging Face")
	st.stop()

	# Connexion directe sans pooler → autorise les options PostgreSQL
	engine = create_engine(
	database_url,
	pool_pre_ping=True,
	pool_recycle=3600,
	connect_args={
	"connect_timeout": 10,
	"options": "-c statement_timeout=30000"
	}
	)
	return engine
	except Exception as e:
	st.error(f"❌ Erreur de connexion à la base de données: {e}")
	st.stop()


	# ========================== CHARGEMENT DES DONNÉES ==========================
	@st.cache_data(ttl=3600)
	def load_csv_data():
	"""Charge le fichier CSV pour l'EDA"""
	try:
	df = pd.read_csv("fraudTest.csv")
	return df
	except Exception as e:
	st.error(f"❌ Erreur lors du chargement du fichier CSV: {e}")
	return pd.DataFrame()


	# ========================== REQUÊTES SQL OPTIMISÉES ==========================
	def load_all_data():
	"""Charge toutes les transactions - APPELÉ SEULEMENT APRÈS CLIC SUR REFRESH"""
	engine = get_db_connection()
	query = text("""
	SELECT
	trans_num, merchant, category, amt, gender, city, zip, city_pop, job,
	hour, day, month, year, pred_is_fraud, is_fraud_ground_truth,
	transaction_time, created_at
	FROM fraud_predictions
	ORDER BY created_at DESC
	LIMIT 10000
	""")

	try:
	with engine.connect() as conn:
	df = pd.read_sql(query, conn)
	df['created_at'] = pd.to_datetime(df['created_at'])
	return df
	except Exception as e:
	st.error(f"❌ Erreur lors du chargement des données: {e}")
	return pd.DataFrame()

	def load_last_24h_data():
	"""Charge les transactions des dernières 24h"""
	engine = get_db_connection()
	query = text("""
	SELECT trans_num, merchant, category, amt, gender, city, pred_is_fraud, created_at
	FROM fraud_predictions
	WHERE created_at >= NOW() - INTERVAL '24 HOURS'
	ORDER BY created_at DESC
	""")

	try:
	with engine.connect() as conn:
	df = pd.read_sql(query, conn)
	df['created_at'] = pd.to_datetime(df['created_at'])
	return df
	except Exception as e:
	st.error(f"❌ Erreur lors du chargement des données 24h: {e}")
	return pd.DataFrame()

	def load_last_7_days_stats():
	"""Charge les stats agrégées des 7 derniers jours"""
	engine = get_db_connection()
	query = text("""
	SELECT DATE(created_at) as date,
	SUM(CASE WHEN pred_is_fraud = 1 THEN 1 ELSE 0 END) as frauds,
	SUM(CASE WHEN pred_is_fraud = 0 THEN 1 ELSE 0 END) as no_frauds
	FROM fraud_predictions
	WHERE created_at >= NOW() - INTERVAL '30 DAYS'
	GROUP BY DATE(created_at)
	ORDER BY date ASC
	""")

	try:
	with engine.connect() as conn:
	df = pd.read_sql(query, conn)
	return df
	except Exception as e:
	st.error(f"❌ Erreur lors du chargement des stats 7 jours: {e}")
	return pd.DataFrame()

	def load_dashboard_summary():
	"""Charge les métriques pour le dashboard (robuste à toutes versions SQLAlchemy)"""
	engine = get_db_connection()
	query = text("""
	SELECT
	COUNT(*) as total_transactions,
	COALESCE(SUM(CASE WHEN pred_is_fraud = 1 THEN 1 ELSE 0 END), 0) as total_frauds,
	COALESCE(SUM(CASE WHEN pred_is_fraud = 0 THEN 1 ELSE 0 END), 0) as total_no_frauds,
	COALESCE(SUM(CASE WHEN pred_is_fraud = 1 THEN amt ELSE 0 END), 0) as total_fraud_amount
	FROM fraud_predictions;
	""")

	try:
	with engine.connect() as conn:
	# mappins().first() renvoie un dict-like (compatible SQLAlchemy 1.x/2.x)
	result = conn.execute(query).mappings().first()

	if not result:
	return {'total_frauds': 0, 'total_no_frauds': 0, 'total_fraud_amount': 0.0}

	# Convertir explicitement en float pour éviter Decimal * float errors
	total_fraud_amount = result.get('total_fraud_amount', 0) or 0
	try:
	total_fraud_amount = float(total_fraud_amount)
	except (TypeError, ValueError):
	total_fraud_amount = 0.0

	return {
	'total_frauds': int(result.get('total_frauds', 0) or 0),
	'total_no_frauds': int(result.get('total_no_frauds', 0) or 0),
	'total_fraud_amount': total_fraud_amount
	}

	except Exception as e:
	st.error(f"❌ Erreur lors du chargement du résumé: {e}")
	return {'total_frauds': 0, 'total_no_frauds': 0, 'total_fraud_amount': 0.0}



	# ========================== PAGE: DASHBOARD ==========================
	def page_dashboard():
	st.title("🕵🏻 Fraud Detection Dashboard")

	# Message d'instruction
	st.info("👇 Cliquez sur Refresh Data pour charger les données")

	# Bouton refresh qui contrôle le chargement
	if st.button("🔄 Refresh Data", type="primary", key="dashboard_refresh"):
	st.session_state.dashboard_loaded = True

	# Ne charger que si le bouton a été cliqué
	if not st.session_state.get('dashboard_loaded', False):
	st.warning("⚠️ Cliquez sur 'Refresh Data' pour afficher le dashboard")
	return

	with st.spinner("Chargement des données..."):
	# Charger d'abord le résumé (rapide)
	summary = load_dashboard_summary()
	df_7days = load_last_7_days_stats()

	# ========================== MÉTRIQUES ==========================
	total_frauds = summary['total_frauds']
	total_no_frauds = summary['total_no_frauds']
	saved_amount = int(summary['total_fraud_amount'] * 1.5)

	col1, col2, col3 = st.columns(3)
	with col1:
	st.markdown(f"""
	<div style="background-color: {COLOR_FRAUD}; padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: white; margin: 0;">🚨 Frauds</h3>
	<h1 style="color: white; margin: 10px 0;">{total_frauds}</h1>
	</div>
	""", unsafe_allow_html=True)
	with col2:
	st.markdown(f"""
	<div style="background-color: {COLOR_NO_FRAUD}; padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: white; margin: 0;">✅ No Frauds</h3>
	<h1 style="color: white; margin: 10px 0;">{total_no_frauds}</h1>
	</div>
	""", unsafe_allow_html=True)
	with col3:
	st.markdown(f"""
	<div style="background-color: {COLOR_SAVED}; padding: 20px; border-radius: 10px; text-align: center;">
	<h3 style="color: white; margin: 0;">💰 Saved Amount</h3>
	<h1 style="color: white; margin: 10px 0;">${saved_amount}</h1>
	</div>
	""", unsafe_allow_html=True)

	st.markdown("<br>", unsafe_allow_html=True)

	# ========================== GRAPHIQUES ==========================
	col_pie, col_saved_detail = st.columns([1, 1])

	with col_pie:
	fig_pie = go.Figure(data=[go.Pie(
	labels=['Frauds', 'No Frauds'],
	values=[total_frauds, total_no_frauds],
	marker=dict(colors=[COLOR_FRAUD, COLOR_NO_FRAUD]),
	hole=0.4,
	textinfo='label+percent',
	textfont_size=14
	)])
	fig_pie.update_layout(title="Distribution Fraud vs No Fraud", showlegend=True, height=400)
	st.plotly_chart(fig_pie, use_container_width=True)

	with col_saved_detail:
	total_fraud_amount = summary['total_fraud_amount']
	additional_costs = total_fraud_amount * 0.5
	st.markdown("### 💵 Breakdown of Saved Amount")
	st.markdown(f"""
	- Total Fraud Amounts: ${total_fraud_amount:,.2f}
	- Estimated Additional Costs (chargebacks, fees): ${additional_costs:,.2f}
	- Total Saved: ${saved_amount:,.2f}
	""")
	fig_breakdown = go.Figure(data=[
	go.Bar(name='Fraud Amount', x=['Saved'], y=[total_fraud_amount], marker_color=COLOR_FRAUD),
	go.Bar(name='Additional Costs', x=['Saved'], y=[additional_costs], marker_color=COLOR_SAVED)
	])
	fig_breakdown.update_layout(barmode='stack', showlegend=True, height=250, yaxis_title="Amount ($)")
	st.plotly_chart(fig_breakdown, use_container_width=True)

	# ========================== GRAPHIQUE 7 JOURS ==========================
	st.markdown("### 📊 Fraud Trend - Last 30 Days")
	if not df_7days.empty:
	fig_trend = go.Figure()
	fig_trend.add_trace(go.Bar(name='Frauds', x=df_7days['date'], y=df_7days['frauds'], marker_color=COLOR_FRAUD))
	fig_trend.add_trace(go.Bar(name='No Frauds', x=df_7days['date'], y=df_7days['no_frauds'], marker_color=COLOR_NO_FRAUD))
	fig_trend.update_layout(barmode='stack', xaxis_title="Date", yaxis_title="Number of Transactions", height=400, showlegend=True, hovermode='x unified')
	st.plotly_chart(fig_trend, use_container_width=True)
	else:
	st.info("Pas encore de données sur 7 jours")


	# ========================== PAGE: EDA ==========================
	def page_eda():
	st.title("📊 Exploratory Data Analysis")

	st.info("👇 Cliquez sur Load Data pour charger les données EDA")

	if st.button("🔄 Load Data", type="primary", key="eda_refresh"):
	st.session_state.eda_loaded = True

	if not st.session_state.get('eda_loaded', False):
	st.warning("⚠️ Cliquez sur 'Load Data' pour afficher l'analyse")
	return

	with st.spinner("Chargement des données..."):
	df = load_csv_data()

	if df.empty:
	st.error("Impossible de charger les données")
	return

	# ========================== 1. RÉSUMÉ DU DATASET ==========================
	st.markdown("## 📋 Résumé du Dataset")

	# Informations générales
	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("📊 Nombre de lignes", f"{len(df):,}")
	with col2:
	st.metric("📋 Nombre de colonnes", f"{len(df.columns)}")
	with col3:
	st.metric("💾 Taille mémoire", f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
	with col4:
	duplicates = df.duplicated().sum()
	st.metric("🔄 Doublons", f"{duplicates:,}")

	# Valeurs manquantes
	st.markdown("### 🔍 Valeurs manquantes")
	missing = df.isnull().sum()
	missing_pct = (missing / len(df) * 100).round(2)
	missing_df = pd.DataFrame({
	'Colonne': missing.index,
	'Manquantes': missing.values,
	'Pourcentage': missing_pct.values
	})
	missing_df = missing_df[missing_df['Manquantes'] > 0].sort_values('Manquantes', ascending=False)

	if not missing_df.empty:
	fig_missing = px.bar(
	missing_df,
	x='Colonne',
	y='Pourcentage',
	title='Pourcentage de valeurs manquantes par colonne',
	color='Pourcentage',
	color_continuous_scale='Reds',
	text=missing_df['Pourcentage'].apply(lambda x: f"{x:.1f}%")
	)
	fig_missing.update_layout(showlegend=False, height=400)
	st.plotly_chart(fig_missing, use_container_width=True)
	else:
	st.success("✅ Aucune valeur manquante dans le dataset !")

	# Statistiques descriptives
	st.markdown("### 📊 Statistiques descriptives (Variables numériques)")

	# Sélecteur de colonnes numériques
	numeric_cols_all = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
	selected_stats_cols = st.multiselect(
	"Choisissez les colonnes à analyser",
	numeric_cols_all,
	default=numeric_cols_all[:5]
	)

	if selected_stats_cols:
	stats_df = df[selected_stats_cols].describe().T
	stats_df['missing'] = df[selected_stats_cols].isnull().sum().values
	stats_df['missing_pct'] = (stats_df['missing'] / len(df) * 100).round(2)

	# Formater pour l'affichage
	display_stats = stats_df[['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max', 'missing', 'missing_pct']]
	display_stats.columns = ['Count', 'Moyenne', 'Écart-type', 'Min', 'Q1', 'Médiane', 'Q3', 'Max', 'Manquantes', 'Manquantes (%)']

	st.dataframe(
	display_stats.style.format({
	'Moyenne': '{:.2f}',
	'Écart-type': '{:.2f}',
	'Min': '{:.2f}',
	'Q1': '{:.2f}',
	'Médiane': '{:.2f}',
	'Q3': '{:.2f}',
	'Max': '{:.2f}',
	'Manquantes (%)': '{:.2f}'
	}),
	use_container_width=True
	)

	# Distribution des variables numériques
	st.markdown("### 📈 Distributions des variables numériques")
	selected_dist = st.selectbox("Choisissez une variable à visualiser", selected_stats_cols)

	col_hist, col_box = st.columns(2)

	with col_hist:
	fig_hist = px.histogram(
	df,
	x=selected_dist,
	nbins=50,
	title=f"Distribution de {selected_dist}",
	color_discrete_sequence=['#636EFA']
	)
	fig_hist.update_layout(showlegend=False, height=350)
	st.plotly_chart(fig_hist, use_container_width=True)

	with col_box:
	fig_box = px.box(
	df,
	y=selected_dist,
	title=f"Box plot de {selected_dist}",
	color_discrete_sequence=['#636EFA']
	)
	fig_box.update_layout(showlegend=False, height=350)
	st.plotly_chart(fig_box, use_container_width=True)

	# Variables catégorielles
	st.markdown("### 🏷️ Variables catégorielles")
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()

	if categorical_cols:
	selected_cat = st.selectbox("Choisissez une variable catégorielle", categorical_cols)

	value_counts = df[selected_cat].value_counts().head(15)

	col_bar, col_info = st.columns([2, 1])

	with col_bar:
	fig_cat = px.bar(
	x=value_counts.index,
	y=value_counts.values,
	title=f"Top 15 valeurs de {selected_cat}",
	labels={'x': selected_cat, 'y': 'Count'},
	color=value_counts.values,
	color_continuous_scale='Blues'
	)
	fig_cat.update_layout(showlegend=False, height=400)
	st.plotly_chart(fig_cat, use_container_width=True)

	with col_info:
	st.markdown("#### Statistiques")
	st.metric("Valeurs uniques", df[selected_cat].nunique())
	st.metric("Valeur la plus fréquente", value_counts.index[0])
	st.metric("Fréquence max", f"{value_counts.values[0]:,}")
	st.metric("% de la plus fréquente", f"{(value_counts.values[0] / len(df) * 100):.1f}%")
	else:
	st.info("Aucune variable catégorielle détectée")

	st.markdown("---")

	# ========================== 2. DISTRIBUTION FRAUDE vs NON-FRAUDE ==========================
	st.markdown("## 🥧 Distribution des transactions")

	fraud_counts = df["is_fraud"].value_counts().reset_index()
	fraud_counts.columns = ["is_fraud", "count"]
	fraud_counts["label"] = fraud_counts["is_fraud"].map({0: "Non frauduleuse", 1: "Frauduleuse"})

	fig_pie = px.pie(
	fraud_counts,
	values="count",
	names="label",
	title="Répartition des transactions : frauduleuses vs non frauduleuses",
	color_discrete_sequence=["#636EFA", "#EF553B"],
	hole=0.4
	)
	fig_pie.update_traces(textinfo="percent+label")
	st.plotly_chart(fig_pie, use_container_width=True)

	# Statistiques
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Total transactions", f"{len(df):,}")
	with col2:
	st.metric("Fraudes", f"{fraud_counts[fraud_counts['is_fraud']==1]['count'].values[0]:,}")
	with col3:
	fraud_rate = (fraud_counts[fraud_counts['is_fraud']==1]['count'].values[0] / len(df)) * 100
	st.metric("Taux de fraude", f"{fraud_rate:.2f}%")

	st.markdown("---")

	# ========================== 3. CARTE GÉOGRAPHIQUE ==========================
	st.markdown("## 🗺️ Localisation géographique des transactions")

	# Vérifier si les colonnes existent
	if 'merch_lat' in df.columns and 'merch_long' in df.columns:
	df_geo = df.dropna(subset=["merch_lat", "merch_long"])

	# Option d'échantillonnage pour performance
	sample_size = st.slider("Nombre de points à afficher", 1000, min(50000, len(df_geo)), 10000, step=1000)
	df_sample = df_geo.sample(n=min(sample_size, len(df_geo)), random_state=42)

	# Ajouter le label
	df_sample["fraud_label"] = df_sample["is_fraud"].map({0: "Non frauduleuse", 1: "Frauduleuse"})

	fig_map = px.scatter_mapbox(
	df_sample,
	lat="merch_lat",
	lon="merch_long",
	color="fraud_label",
	color_discrete_map={"Non frauduleuse": "#636EFA", "Frauduleuse": "#EF553B"},
	title=f"Localisation des transactions ({sample_size} points échantillonnés)",
	mapbox_style="open-street-map",
	zoom=3,
	height=700,
	hover_data=["amt", "category", "merchant"]
	)

	fig_map.update_layout(
	legend_title_text="Type de transaction",
	margin={"r":0,"t":50,"l":0,"b":0}
	)

	st.plotly_chart(fig_map, use_container_width=True)
	else:
	st.warning("⚠️ Les colonnes de géolocalisation (merch_lat, merch_long) ne sont pas disponibles dans le dataset")

	st.markdown("---")

	# ========================== 4. FRAUDES PAR GENRE ==========================
	st.markdown("## 👥 Analyse par genre")

	if 'gender' in df.columns:
	# Nombre de fraudes par genre
	fraud_by_gender = df[df["is_fraud"] == 1]["gender"].value_counts().reset_index()
	fraud_by_gender.columns = ["gender", "count"]
	fraud_by_gender["gender_label"] = fraud_by_gender["gender"].map({"M": "Homme", "F": "Femme"})

	col1, col2 = st.columns(2)

	with col1:
	fig_gender = px.bar(
	fraud_by_gender,
	x="gender_label",
	y="count",
	color="gender_label",
	color_discrete_map={"Homme": "#1f77b4", "Femme": "#ff7f0e"},
	title="Nombre de fraudes par genre",
	labels={"count": "Nombre de fraudes", "gender_label": "Genre"},
	text="count"
	)
	fig_gender.update_layout(showlegend=False)
	st.plotly_chart(fig_gender, use_container_width=True)

	with col2:
	# Taux de fraude par genre
	gender_stats = df.groupby('gender')['is_fraud'].agg(['sum', 'count']).reset_index()
	gender_stats['fraud_rate'] = (gender_stats['sum'] / gender_stats['count']) * 100
	gender_stats['gender_label'] = gender_stats['gender'].map({"M": "Homme", "F": "Femme"})

	fig_rate = px.bar(
	gender_stats,
	x="gender_label",
	y="fraud_rate",
	color="gender_label",
	color_discrete_map={"Homme": "#1f77b4", "Femme": "#ff7f0e"},
	title="Taux de fraude par genre (%)",
	labels={"fraud_rate": "Taux de fraude (%)", "gender_label": "Genre"},
	text=gender_stats['fraud_rate'].apply(lambda x: f"{x:.2f}%")
	)
	fig_rate.update_layout(showlegend=False)
	st.plotly_chart(fig_rate, use_container_width=True)
	else:
	st.warning("⚠️ La colonne 'gender' n'est pas disponible dans le dataset")

	st.markdown("---")

	# ========================== 5. CORRÉLATIONS ET PAIRPLOT ==========================
	st.markdown("## 🔍 Analyse de corrélations")

	# Sélectionner les colonnes numériques
	numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

	# Retirer is_fraud de la liste
	if 'is_fraud' in numeric_cols:
	numeric_cols.remove('is_fraud')

	# Limiter aux colonnes les plus pertinentes par défaut
	default_cols = ['amt', 'city_pop', 'lat', 'long'][:4]
	default_cols = [col for col in default_cols if col in numeric_cols]

	selected_cols = st.multiselect(
	"Choisissez les variables à analyser (3-5 variables recommandées)",
	numeric_cols,
	default=default_cols[:4],
	max_selections=6
	)

	if selected_cols and len(selected_cols) >= 2:
	# ========================== MATRICE DE CORRÉLATION ==========================
	st.markdown("### 📊 Matrice de corrélation")

	corr_matrix = df[selected_cols].corr()

	# Créer une heatmap annotée plus lisible
	fig_corr = go.Figure(data=go.Heatmap(
	z=corr_matrix.values,
	x=corr_matrix.columns,
	y=corr_matrix.columns,
	colorscale='RdBu_r',
	zmid=0,
	text=corr_matrix.values,
	texttemplate='%{text:.2f}',
	textfont={"size": 12},
	colorbar=dict(title="Corrélation")
	))

	fig_corr.update_layout(
	title="Matrice de corrélation de Pearson",
	xaxis_title="",
	yaxis_title="",
	height=500,
	width=500
	)

	st.plotly_chart(fig_corr, use_container_width=True)

	# Afficher les corrélations les plus fortes
	corr_pairs = []
	for i in range(len(corr_matrix.columns)):
	for j in range(i+1, len(corr_matrix.columns)):
	corr_pairs.append({
	'Variable 1': corr_matrix.columns[i],
	'Variable 2': corr_matrix.columns[j],
	'Corrélation': corr_matrix.iloc[i, j]
	})

	corr_df = pd.DataFrame(corr_pairs).sort_values('Corrélation', key=abs, ascending=False)

	st.markdown("#### 🔝 Top 5 des corrélations")
	st.dataframe(
	corr_df.head(5).style.background_gradient(
	subset=['Corrélation'],
	cmap='RdBu_r',
	vmin=-1,
	vmax=1
	).format({'Corrélation': '{:.3f}'}),
	use_container_width=True
	)

	# ========================== PAIRPLOT STYLE SEABORN ==========================
	st.markdown("### 🎨 Pairplot (style Seaborn)")

	# Échantillonner pour la performance
	sample_size_pair = min(1000, len(df))
	df_pair = df[selected_cols + ['is_fraud']].sample(n=sample_size_pair, random_state=42)
	df_pair['fraud_label'] = df_pair['is_fraud'].map({0: "Non frauduleuse", 1: "Frauduleuse"})

	# Créer une grille de subplots
	n_vars = len(selected_cols)

	from plotly.subplots import make_subplots

	fig_pair = make_subplots(
	rows=n_vars,
	cols=n_vars,
	shared_xaxes=True,
	shared_yaxes=True,
	vertical_spacing=0.02,
	horizontal_spacing=0.02
	)

	colors = {'Non frauduleuse': '#636EFA', 'Frauduleuse': '#EF553B'}

	for i, var_y in enumerate(selected_cols):
	for j, var_x in enumerate(selected_cols):
	row = i + 1
	col = j + 1

	if i == j:
	# Diagonale : histogrammes
	for fraud_label in ['Non frauduleuse', 'Frauduleuse']:
	data = df_pair[df_pair['fraud_label'] == fraud_label][var_x]
	fig_pair.add_trace(
	go.Histogram(
	x=data,
	name=fraud_label,
	marker_color=colors[fraud_label],
	opacity=0.7,
	showlegend=(i == 0 and j == 0)
	),
	row=row, col=col
	)
	elif i > j:
	# Triangle inférieur : scatter plots
	for fraud_label in ['Non frauduleuse', 'Frauduleuse']:
	data = df_pair[df_pair['fraud_label'] == fraud_label]
	fig_pair.add_trace(
	go.Scatter(
	x=data[var_x],
	y=data[var_y],
	mode='markers',
	name=fraud_label,
	marker=dict(
	color=colors[fraud_label],
	size=4,
	opacity=0.6
	),
	showlegend=False
	),
	row=row, col=col
	)

	# Labels uniquement sur les bords
	if i == n_vars - 1:
	fig_pair.update_xaxes(title_text=var_x, row=row, col=col)
	if j == 0:
	fig_pair.update_yaxes(title_text=var_y, row=row, col=col)

	fig_pair.update_layout(
	height=200 * n_vars,
	title_text=f"Pairplot - {sample_size_pair} échantillons",
	showlegend=True
	)

	st.plotly_chart(fig_pair, use_container_width=True)

	st.info("💡 Lecture du pairplot : La diagonale montre la distribution de chaque variable. Le triangle inférieur montre les relations entre paires de variables.")

	elif selected_cols and len(selected_cols) < 2:
	st.warning("⚠️ Veuillez sélectionner au moins 2 variables pour voir les corrélations")
	else:
	st.warning("⚠️ Veuillez sélectionner des variables à analyser")


	# ========================== PAGE: FRAUDES (24h) ==========================
	def page_frauds():
	st.title("🚨 Fraudes Détectées (Dernières 24h)")

	st.info("👇 Cliquez sur Refresh Data pour charger les fraudes")

	if st.button("🔄 Refresh Data", type="primary", key="frauds_refresh"):
	st.session_state.frauds_loaded = True

	if not st.session_state.get('frauds_loaded', False):
	st.warning("⚠️ Cliquez sur 'Refresh Data' pour afficher les fraudes")
	return

	with st.spinner("Chargement des fraudes..."):
	df = load_last_24h_data()

	# 🔧 Convertir created_at en heure de Paris
	if 'created_at' in df.columns and not df.empty:
	df['created_at'] = pd.to_datetime(df['created_at'], utc=True)
	df['created_at'] = df['created_at'].dt.tz_convert('Europe/Paris')
	# Formater pour affichage plus lisible
	df['created_at_display'] = df['created_at'].dt.strftime('%Y-%m-%d %H:%M:%S')

	df_frauds = df[df['pred_is_fraud'] == 1]

	st.markdown(f"""
	<div style="background-color: {COLOR_FRAUD}; padding: 15px; border-radius: 10px; text-align: center; margin-bottom: 20px;">
	<h2 style="color: white; margin: 0;">🚨 {len(df_frauds)} Fraudes détectées</h2>
	</div>
	""", unsafe_allow_html=True)

	if not df_frauds.empty:
	# Utiliser created_at_display au lieu de created_at pour l'affichage
	display_cols = ['trans_num','merchant','category','amt','city','gender','created_at_display']
	df_display = df_frauds[display_cols].copy()
	df_display = df_display.rename(columns={'created_at_display': 'Date/Heure (Paris)'})

	st.dataframe(
	df_display.sort_values('Date/Heure (Paris)', ascending=False),
	use_container_width=True,
	height=600
	)
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Montant total", f"${df_frauds['amt'].sum():,.2f}")
	with col2:
	st.metric("Montant moyen", f"${df_frauds['amt'].mean():,.2f}")
	with col3:
	st.metric("Montant max", f"${df_frauds['amt'].max():,.2f}")
	else:
	st.success("✅ Aucune fraude détectée dans les dernières 24h !")

	# ========================== PAGE: NON FRAUDES (24h) ==========================
	def page_no_frauds():
	st.title("✅ Transactions Légitimes (Dernières 24h)")

	st.info("👇 Cliquez sur Refresh Data pour charger les transactions")

	if st.button("🔄 Refresh Data", type="primary", key="no_frauds_refresh"):
	st.session_state.no_frauds_loaded = True

	if not st.session_state.get('no_frauds_loaded', False):
	st.warning("⚠️ Cliquez sur 'Refresh Data' pour afficher les transactions")
	return

	with st.spinner("Chargement des transactions légitimes..."):
	df = load_last_24h_data()

	# 🔧 Convertir created_at en heure de Paris
	if 'created_at' in df.columns and not df.empty:
	df['created_at'] = pd.to_datetime(df['created_at'], utc=True)
	df['created_at'] = df['created_at'].dt.tz_convert('Europe/Paris')
	# Formater pour affichage plus lisible
	df['created_at_display'] = df['created_at'].dt.strftime('%Y-%m-%d %H:%M:%S')

	df_no_frauds = df[df['pred_is_fraud'] == 0]

	st.markdown(f"""
	<div style="background-color: {COLOR_NO_FRAUD}; padding: 15px; border-radius: 10px; text-align: center; margin-bottom: 20px;">
	<h2 style="color: white; margin: 0;">✅ {len(df_no_frauds)} Transactions légitimes</h2>
	</div>
	""", unsafe_allow_html=True)

	if not df_no_frauds.empty:
	# Utiliser created_at_display au lieu de created_at pour l'affichage
	display_cols = ['trans_num','merchant','category','amt','city','gender','created_at_display']
	df_display = df_no_frauds[display_cols].copy()
	df_display = df_display.rename(columns={'created_at_display': 'Date/Heure (Paris)'})

	st.dataframe(
	df_display.sort_values('Date/Heure (Paris)', ascending=False),
	use_container_width=True,
	height=600
	)
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Montant total", f"${df_no_frauds['amt'].sum():,.2f}")
	with col2:
	st.metric("Montant moyen", f"${df_no_frauds['amt'].mean():,.2f}")
	with col3:
	st.metric("Montant max", f"${df_no_frauds['amt'].max():,.2f}")
	else:
	st.warning("⚠️ Aucune transaction légitime dans les dernières 24h")

	# ========================== NAVIGATION ==========================
	def main():
	st.sidebar.title("Navigation")
	page = st.sidebar.radio(
	"Go to",
	["🏠 Dashboard", "📊 EDA", "🚨 Fraudes (24h)", "✅ Non Fraudes (24h)"]
	)

	st.sidebar.markdown("---")
	st.sidebar.markdown("""
	### ℹ️ À propos
	Dashboard de détection de fraude en temps réel.

	🔄 Refresh : Cliquez sur le bouton pour charger/actualiser les données.

	⚡ Optimisé : Les données ne se chargent que sur demande pour économiser les ressources.

	📊 Données :
	- Dashboard: Stats temps réel
	- EDA: Analyse du dataset complet
	- Détail: Dernières 24h
	""")

	if page == "🏠 Dashboard":
	page_dashboard()
	elif page == "📊 EDA":
	page_eda()
	elif page == "🚨 Fraudes (24h)":
	page_frauds()
	elif page == "✅ Non Fraudes (24h)":
	page_no_frauds()

	if __name__ == "__main__":
	main()