Spaces:

NYU-DS-4-Everyone
/

test9998

Sleeping

App Files Files Community

test9998 / src /streamlit_app.py

gaetanbrison

Update src/streamlit_app.py

e768e32 verified 7 months ago

raw

history blame contribute delete

65.6 kB


	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from datetime import datetime
	import warnings
	from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
	from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
	from sklearn.linear_model import LinearRegression, LogisticRegression
	from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
	from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
	from sklearn.svm import SVC, SVR
	from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import (
	mean_squared_error, mean_absolute_error, r2_score,
	accuracy_score, precision_score, recall_score, f1_score,
	confusion_matrix, classification_report, roc_auc_score
	)
	warnings.filterwarnings('ignore')

	# MLflow and experiment tracking
	try:
	import mlflow
	import mlflow.sklearn
	MLFLOW_AVAILABLE = True
	except ImportError:
	MLFLOW_AVAILABLE = False
	st.warning("MLflow not installed. Some features may be limited.")

	# PyCaret imports
	try:
	from pycaret.classification import setup as cls_setup, compare_models as cls_compare, create_model as cls_create
	from pycaret.classification import tune_model as cls_tune, finalize_model as cls_finalize, predict_model as cls_predict
	from pycaret.classification import pull as cls_pull, plot_model as cls_plot, evaluate_model as cls_evaluate
	from pycaret.regression import setup as reg_setup, compare_models as reg_compare, create_model as reg_create
	from pycaret.regression import tune_model as reg_tune, finalize_model as reg_finalize, predict_model as reg_predict
	from pycaret.regression import pull as reg_pull, plot_model as reg_plot, evaluate_model as reg_evaluate
	PYCARET_AVAILABLE = True
	except ImportError:
	PYCARET_AVAILABLE = False
	st.warning("PyCaret not installed. AutoML features will be limited.")

	# Data profiling
	#try:
	# from ydata_profiling import ProfileReport
	# from streamlit_pandas_profiling import st_profile_report
	# PROFILING_AVAILABLE = True
	#except ImportError:
	# PROFILING_AVAILABLE = False

	# PyTorch for deep learning
	try:
	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import TensorDataset, DataLoader
	TORCH_AVAILABLE = True
	except ImportError:
	TORCH_AVAILABLE = False

	# SHAP for explainability
	try:
	import shap
	SHAP_AVAILABLE = True
	except ImportError:
	SHAP_AVAILABLE = False
	# Scikit-learn imports
	from sklearn.preprocessing import LabelEncoder, StandardScaler
	from sklearn.linear_model import LinearRegression, LogisticRegression
	from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
	from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
	from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

	# ================== CUSTOM CSS & STYLING ==================
	st.set_page_config(
	page_title="🚀 Super Data Science App",
	layout="wide",
	initial_sidebar_state="expanded",
	page_icon="🚀"
	)

	st.markdown("""
	<style>
	/* Main styling */
	.main {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	font-family: 'Arial', sans-serif;
	}

	/* Sidebar styling */
	.sidebar .sidebar-content {
	background: linear-gradient(180deg, #2C3E50, #3498DB);
	color: white;
	}

	/* Button styling */
	.stButton > button {
	background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
	color: white;
	border: none;
	border-radius: 25px;
	padding: 0.6rem 1.5rem;
	font-weight: bold;
	transition: all 0.3s ease;
	box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
	}

	.stButton > button:hover {
	transform: translateY(-2px);
	box-shadow: 0 8px 25px 0 rgba(31, 38, 135, 0.37);
	}

	/* Metric styling */
	.metric-container {
	background: rgba(255, 255, 255, 0.1);
	backdrop-filter: blur(10px);
	border-radius: 15px;
	padding: 1rem;
	margin: 0.5rem 0;
	border: 1px solid rgba(255, 255, 255, 0.2);
	}

	/* Header styling */
	.main-header {
	text-align: center;
	padding: 2rem 0;
	background: rgba(255, 255, 255, 0.1);
	backdrop-filter: blur(10px);
	border-radius: 20px;
	margin-bottom: 2rem;
	border: 1px solid rgba(255, 255, 255, 0.2);
	}

	/* Success/Error messages */
	.stSuccess, .stError, .stWarning {
	border-radius: 10px;
	border: none;
	}
	</style>
	""", unsafe_allow_html=True)

	# ================== HEADER ==================
	st.markdown("""
	<div class="main-header">
	<h1 style="color: white; font-size: 3rem; margin-bottom: 0;">🚀 Super Data Science App</h1>
	<p style="color: rgba(255,255,255,0.8); font-size: 1.2rem;">
	Complete ML Pipeline: EDA → Modeling → AutoML → Explainability → Deployment
	</p>
	</div>
	""", unsafe_allow_html=True)

	# ================== AUTHENTICATION ==================
	def check_authentication():
	if 'authenticated' not in st.session_state:
	st.session_state.authenticated = False

	if not st.session_state.authenticated:
	with st.sidebar:
	st.header("🔒 Authentication")
	password = st.text_input("Enter Password", type="password", key="auth_password")
	col1, col2 = st.columns(2)
	with col1:
	if st.button("🔑 Login", key="login_btn"):
	if password == "ds4everyone":
	st.session_state.authenticated = True
	st.success("✅ Access Granted!")
	st.rerun()
	else:
	st.error("❌ Incorrect Password")
	with col2:
	if st.button("👤 Demo Mode", key="demo_btn"):
	st.session_state.authenticated = True
	st.session_state.demo_mode = True
	st.info("📊 Demo Mode Activated")
	st.rerun()

	st.info("🔐 Please authenticate to access the application")
	st.stop()

	check_authentication()

	# ================== SESSION STATE INITIALIZATION ==================
	if 'df' not in st.session_state:
	st.session_state.df = None
	if 'trained_models' not in st.session_state:
	st.session_state.trained_models = {}
	if 'pycaret_setup_done' not in st.session_state:
	st.session_state.pycaret_setup_done = False
	if 'best_model' not in st.session_state:
	st.session_state.best_model = None
	if 'dl_models' not in st.session_state:
	st.session_state.dl_models = {}
	if 'training_history' not in st.session_state:
	st.session_state.training_history = {}

	# ================== SIDEBAR NAVIGATION ==================
	st.sidebar.title("🧭 Navigation")
	pages = [
	"🏠 Home",
	"📊 Data Loading",
	"🔍 EDA & Profiling",
	"📈 Visualization",
	"🤖 Classical ML",
	"⚡ PyCaret AutoML",
	"🧠 Deep Learning",
	"🎯 Model Evaluation",
	"🔬 Explainability",
	"📋 MLflow Tracking",
	"🚀 Model Deployment"
	]

	selected_page = st.sidebar.selectbox("Select Page", pages, key="page_selector")

	# ================== UTILITY FUNCTIONS ==================
	def load_sample_data(dataset_name):
	"""Load sample datasets"""
	if dataset_name == "California Housing":
	from sklearn.datasets import fetch_california_housing
	data = fetch_california_housing(as_frame=True)
	df = pd.concat([data.data, data.target.rename('MedHouseVal')], axis=1)
	return df.sample(n=min(2000, len(df))) # Limit for performance

	elif dataset_name == "Iris":
	from sklearn.datasets import load_iris
	data = load_iris(as_frame=True)
	df = pd.concat([data.data, data.target.rename('species')], axis=1)
	return df

	elif dataset_name == "Wine Quality":
	url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
	try:
	df = pd.read_csv(url, sep=';')
	return df.sample(n=min(1000, len(df)))
	except:
	st.error("Could not load Wine Quality dataset")
	return None

	elif dataset_name == "Titanic":
	url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
	try:
	df = pd.read_csv(url)
	return df
	except:
	st.error("Could not load Titanic dataset")
	return None

	def get_dataset_info(df):
	"""Get comprehensive dataset information"""
	info = {
	'shape': df.shape,
	'columns': df.columns.tolist(),
	'dtypes': df.dtypes.to_dict(),
	'missing_values': df.isnull().sum().to_dict(),
	'memory_usage': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB",
	'numeric_columns': df.select_dtypes(include=[np.number]).columns.tolist(),
	'categorical_columns': df.select_dtypes(exclude=[np.number]).columns.tolist()
	}
	return info

	# ================== PAGE CONTENT ==================

	if selected_page == "🏠 Home":
	col1, col2, col3 = st.columns([1, 2, 1])

	with col2:
	st.markdown("""
	## Welcome to the Super Data Science App! 🎉

	This comprehensive application provides a complete machine learning pipeline:
	""")

	features = [
	"📊 Data Loading: Upload CSV or use sample datasets",
	"🔍 EDA & Profiling: Automated data profiling and exploration",
	"📈 Visualization: Interactive charts with Plotly and Seaborn",
	"🤖 Classical ML: Scikit-learn models with hyperparameter tuning",
	"⚡ PyCaret AutoML: Automated machine learning with model comparison",
	"🎯 Model Evaluation: Comprehensive model performance analysis",
	"🔬 Explainability: SHAP values and feature importance",
	"📋 MLflow Tracking: Experiment tracking and model versioning",
	"🚀 Model Deployment: Model export and deployment preparation"
	]

	for feature in features:
	st.markdown(feature)

	st.markdown("---")

	# Quick stats
	if st.session_state.df is not None:
	col_a, col_b, col_c, col_d = st.columns(4)
	with col_a:
	st.metric("📊 Rows", f"{st.session_state.df.shape[0]:,}")
	with col_b:
	st.metric("📋 Columns", f"{st.session_state.df.shape[1]:,}")
	with col_c:
	st.metric("🤖 Models Trained", len(st.session_state.trained_models))
	with col_d:
	st.metric("✅ Setup Complete", "Ready" if st.session_state.pycaret_setup_done else "Pending")

	elif selected_page == "📊 Data Loading":
	st.header("📊 Data Loading & Management")

	col1, col2 = st.columns([1, 2])

	with col1:
	st.subheader("Data Source")
	data_source = st.radio(
	"Choose data source:",
	["📁 Upload CSV", "🎲 Sample Datasets", "📋 Current Data Info"]
	)

	with col2:
	if data_source == "📁 Upload CSV":
	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
	if uploaded_file is not None:
	try:
	df = pd.read_csv(uploaded_file)
	st.session_state.df = df
	st.success(f"✅ Successfully loaded {df.shape[0]} rows and {df.shape[1]} columns")
	except Exception as e:
	st.error(f"❌ Error loading file: {str(e)}")
	if 'model_comparison' not in st.session_state:
	st.session_state.model_comparison = None
	#st.error(f"❌ Error loading file: {str(e)}")

	elif data_source == "🎲 Sample Datasets":
	sample_options = ["California Housing", "Iris", "Wine Quality", "Titanic"]
	selected_sample = st.selectbox("Choose sample dataset:", sample_options)

	if st.button(f"🔄 Load {selected_sample} Dataset"):
	with st.spinner(f"Loading {selected_sample}..."):
	df = load_sample_data(selected_sample)
	if df is not None:
	st.session_state.df = df
	st.success(f"✅ Loaded {selected_sample} dataset!")

	elif data_source == "📋 Current Data Info":
	if st.session_state.df is not None:
	info = get_dataset_info(st.session_state.df)

	col_a, col_b = st.columns(2)
	with col_a:
	st.metric("📊 Rows", f"{info['shape'][0]:,}")
	st.metric("📋 Columns", f"{info['shape'][1]:,}")
	st.metric("💾 Memory Usage", info['memory_usage'])

	with col_b:
	st.metric("🔢 Numeric Columns", len(info['numeric_columns']))
	st.metric("📝 Categorical Columns", len(info['categorical_columns']))
	st.metric("❌ Missing Values", sum(info['missing_values'].values()))
	else:
	st.info("🔍 No data loaded yet")

	# Data Preview
	if st.session_state.df is not None:
	st.subheader("📋 Data Preview")

	col1, col2, col3 = st.columns(3)
	with col1:
	show_rows = st.slider("Rows to display", 5, 50, 10)
	with col2:
	show_info = st.checkbox("Show column info", value=True)
	with col3:
	if st.button("💾 Download Current Data"):
	csv = st.session_state.df.to_csv(index=False)
	st.download_button(
	label="📥 Download CSV",
	data=csv,
	file_name=f"processed_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime='text/csv'
	)

	# Display data
	st.dataframe(st.session_state.df.head(show_rows), use_container_width=True)

	if show_info:
	st.subheader("📊 Column Information")
	info_df = pd.DataFrame({
	'Column': st.session_state.df.columns,
	'Data Type': st.session_state.df.dtypes,
	'Non-Null Count': st.session_state.df.count(),
	'Missing Values': st.session_state.df.isnull().sum(),
	'Missing %': (st.session_state.df.isnull().sum() / len(st.session_state.df) * 100).round(2)
	})
	st.dataframe(info_df, use_container_width=True)

	elif selected_page == "🔍 EDA & Profiling":
	st.header("🔍 Exploratory Data Analysis & Profiling")

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first from the Data Loading page")
	st.stop()

	df = st.session_state.df

	# Quick EDA
	st.subheader("📊 Quick Statistics")
	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.metric("📏 Dataset Shape", f"{df.shape[0]} × {df.shape[1]}")
	with col2:
	st.metric("🔢 Numeric Columns", len(df.select_dtypes(include=[np.number]).columns))
	with col3:
	st.metric("📝 Text Columns", len(df.select_dtypes(exclude=[np.number]).columns))
	with col4:
	st.metric("❌ Missing Values", df.isnull().sum().sum())

	# Missing Values Analysis
	st.subheader("❌ Missing Values Analysis")
	missing_df = pd.DataFrame({
	'Column': df.columns,
	'Missing Count': df.isnull().sum(),
	'Missing Percentage': (df.isnull().sum() / len(df) * 100).round(2)
	}).sort_values('Missing Count', ascending=False)

	missing_df = missing_df[missing_df['Missing Count'] > 0]

	if len(missing_df) > 0:
	st.dataframe(missing_df, use_container_width=True)

	# Missing values heatmap
	fig, ax = plt.subplots(figsize=(12, 8))
	sns.heatmap(df.isnull(), yticklabels=False, cbar=True, cmap='viridis')
	plt.title('Missing Values Heatmap')
	st.pyplot(fig)
	else:
	st.success("✅ No missing values found in the dataset!")

	# Statistical Summary
	st.subheader("📈 Statistical Summary")
	numeric_cols = df.select_dtypes(include=[np.number]).columns

	if len(numeric_cols) > 0:
	st.dataframe(df[numeric_cols].describe(), use_container_width=True)

	# Distribution plots
	st.subheader("📊 Distribution Analysis")
	selected_cols = st.multiselect("Select columns for distribution analysis:", numeric_cols, default=numeric_cols[:3])

	if selected_cols:
	cols_per_row = 2
	n_rows = (len(selected_cols) + cols_per_row - 1) // cols_per_row

	fig, axes = plt.subplots(n_rows, cols_per_row, figsize=(15, 5*n_rows))
	if n_rows == 1:
	axes = [axes] if cols_per_row == 1 else axes
	else:
	axes = axes.flatten()

	for i, col in enumerate(selected_cols):
	sns.histplot(data=df, x=col, kde=True, ax=axes[i])
	axes[i].set_title(f'Distribution of {col}')

	# Hide empty subplots
	for i in range(len(selected_cols), len(axes)):
	axes[i].set_visible(False)

	plt.tight_layout()
	st.pyplot(fig)

	# Correlation Analysis
	if len(numeric_cols) > 1:
	st.subheader("🔗 Correlation Analysis")
	corr_matrix = df[numeric_cols].corr()

	fig, ax = plt.subplots(figsize=(12, 10))
	sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0,
	square=True, fmt='.2f', ax=ax)
	plt.title('Correlation Matrix')
	st.pyplot(fig)

	# Automated Profiling Report
	# if PROFILING_AVAILABLE:
	# st.subheader("📋 Automated Profiling Report")
	# if st.button("🔄 Generate Comprehensive Profile Report"):
	# with st.spinner("Generating detailed profiling report..."):
	# profile = ProfileReport(df, title="Dataset Profiling Report", explorative=True)
	# st_profile_report(profile)

	elif selected_page == "📈 Visualization":
	st.header("📈 Interactive Data Visualization")

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first from the Data Loading page")
	st.stop()

	df = st.session_state.df
	numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
	categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()

	# Visualization controls
	st.subheader("🎛️ Visualization Controls")

	col1, col2, col3 = st.columns(3)
	with col1:
	viz_type = st.selectbox("Select visualization type:", [
	"📊 Histogram", "📈 Scatter Plot", "📦 Box Plot",
	"🔥 Heatmap", "📉 Line Plot", "🎯 Pair Plot"
	])

	with col2:
	if viz_type in ["📈 Scatter Plot", "📉 Line Plot"]:
	x_col = st.selectbox("X-axis:", numeric_cols + categorical_cols)
	y_col = st.selectbox("Y-axis:", numeric_cols)
	else:
	selected_col = st.selectbox("Select column:", numeric_cols if viz_type != "📦 Box Plot" else df.columns)

	with col3:
	if categorical_cols and viz_type in ["📊 Histogram", "📈 Scatter Plot", "📦 Box Plot"]:
	color_col = st.selectbox("Color by (optional):", ["None"] + categorical_cols)
	color_col = None if color_col == "None" else color_col
	else:
	color_col = None

	# Generate visualizations
	st.subheader("📊 Visualization Output")

	try:
	if viz_type == "📊 Histogram":
	fig = px.histogram(df, x=selected_col, color=color_col,
	title=f'Distribution of {selected_col}',
	marginal="box")
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "📈 Scatter Plot":
	fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
	title=f'{y_col} vs {x_col}',
	trendline="ols" if color_col is None else None)
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "📦 Box Plot":
	if color_col:
	fig = px.box(df, y=selected_col, x=color_col,
	title=f'Box Plot of {selected_col} by {color_col}')
	else:
	fig = px.box(df, y=selected_col,
	title=f'Box Plot of {selected_col}')
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "🔥 Heatmap":
	if len(numeric_cols) > 1:
	corr_matrix = df[numeric_cols].corr()
	fig = px.imshow(corr_matrix, text_auto=True, aspect="auto",
	title="Correlation Heatmap")
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning("Need at least 2 numeric columns for correlation heatmap")

	elif viz_type == "📉 Line Plot":
	fig = px.line(df.sort_values(x_col), x=x_col, y=y_col,
	title=f'{y_col} vs {x_col} (Line Plot)')
	st.plotly_chart(fig, use_container_width=True)

	elif viz_type == "🎯 Pair Plot":
	if len(numeric_cols) >= 2:
	selected_numeric = st.multiselect("Select numeric columns for pair plot:",
	numeric_cols, default=numeric_cols[:4])
	if len(selected_numeric) >= 2:
	fig = px.scatter_matrix(df, dimensions=selected_numeric, color=color_col,
	title="Pair Plot Matrix")
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.warning("Please select at least 2 numeric columns")
	else:
	st.warning("Need at least 2 numeric columns for pair plot")

	except Exception as e:
	st.error(f"Error generating visualization: {str(e)}")

	# Additional visualizations
	st.subheader("📊 Additional Insights")

	# Value counts for categorical columns
	if categorical_cols:
	st.write("Categorical Column Distributions:")
	for col in categorical_cols[:3]: # Limit to first 3
	if df[col].nunique() <= 20: # Only show if not too many categories
	fig = px.bar(df[col].value_counts().head(10),
	title=f'Top 10 values in {col}')
	st.plotly_chart(fig, use_container_width=True)

	elif selected_page == "🤖 Classical ML":
	st.header("🤖 Classical Machine Learning")

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first from the Data Loading page")
	st.stop()

	df = st.session_state.df

	# Model configuration
	st.subheader("⚙️ Model Configuration")

	col1, col2 = st.columns(2)

	with col1:
	# Target selection
	target_col = st.selectbox("🎯 Select target variable:", df.columns)

	# Feature selection
	available_features = [col for col in df.columns if col != target_col]
	selected_features = st.multiselect("📊 Select features:", available_features,
	default=available_features[:5])

	with col2:
	# Problem type detection
	if df[target_col].dtype in ['object', 'bool'] or df[target_col].nunique() < 10:
	problem_type = "Classification"
	st.info("🎯 Detected: Classification Problem")
	model_options = ["Logistic Regression", "Decision Tree", "Random Forest"]
	else:
	problem_type = "Regression"
	st.info("📈 Detected: Regression Problem")
	model_options = ["Linear Regression", "Decision Tree", "Random Forest"]

	selected_model = st.selectbox("🤖 Select model:", model_options)
	test_size = st.slider("🔄 Test set size:", 0.1, 0.5, 0.2, 0.05)

	if not selected_features:
	st.warning("⚠️ Please select at least one feature")
	st.stop()

	# Data preprocessing
	if st.button("🚀 Train Model"):
	with st.spinner("Training model..."):
	try:
	# Prepare data
	X = df[selected_features].copy()
	y = df[target_col].copy()

	# Handle missing values
	X = X.fillna(X.mean() if X.select_dtypes(include=[np.number]).shape[1] > 0 else X.mode().iloc[0])

	# Encode categorical variables
	le_dict = {}
	for col in X.select_dtypes(include=['object']).columns:
	le = LabelEncoder()
	X[col] = le.fit_transform(X[col].astype(str))
	le_dict[col] = le

	# Encode target if classification
	if problem_type == "Classification" and y.dtype == 'object':
	target_le = LabelEncoder()
	y = target_le.fit_transform(y)

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=test_size, random_state=42
	)

	# Train model
	if selected_model == "Linear Regression":
	model = LinearRegression()
	elif selected_model == "Logistic Regression":
	model = LogisticRegression(random_state=42, max_iter=1000)
	elif selected_model == "Decision Tree":
	if problem_type == "Classification":
	model = DecisionTreeClassifier(random_state=42)
	else:
	model = DecisionTreeRegressor(random_state=42)
	elif selected_model == "Random Forest":
	if problem_type == "Classification":
	model = RandomForestClassifier(random_state=42, n_estimators=100)
	else:
	model = RandomForestRegressor(random_state=42, n_estimators=100)

	model.fit(X_train, y_train)
	predictions = model.predict(X_test)

	# Store model
	st.session_state.trained_models[selected_model] = {
	'model': model,
	'X_test': X_test,
	'y_test': y_test,
	'predictions': predictions,
	'features': selected_features,
	'target': target_col,
	'problem_type': problem_type
	}

	st.success("✅ Model trained successfully!")

	# Display results
	st.subheader("📊 Model Performance")

	if problem_type == "Regression":
	mse = mean_squared_error(y_test, predictions)
	mae = mean_absolute_error(y_test, predictions)
	r2 = r2_score(y_test, predictions)

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("MSE", f"{mse:.4f}")
	with col2:
	st.metric("MAE", f"{mae:.4f}")
	with col3:
	st.metric("R² Score", f"{r2:.4f}")

	# Actual vs Predicted plot
	fig = px.scatter(x=y_test, y=predictions,
	labels={'x': 'Actual', 'y': 'Predicted'},
	title='Actual vs Predicted Values')
	fig.add_shape(type="line", x0=y_test.min(), y0=y_test.min(),
	x1=y_test.max(), y1=y_test.max(),
	line=dict(color="red", dash="dash"))
	st.plotly_chart(fig, use_container_width=True)

	else: # Classification
	accuracy = accuracy_score(y_test, predictions)
	precision = precision_score(y_test, predictions, average='weighted')
	recall = recall_score(y_test, predictions, average='weighted')
	f1 = f1_score(y_test, predictions, average='weighted')

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Accuracy", f"{accuracy:.4f}")
	with col2:
	st.metric("Precision", f"{precision:.4f}")
	with col3:
	st.metric("Recall", f"{recall:.4f}")
	with col4:
	st.metric("F1-Score", f"{f1:.4f}")

	# Confusion Matrix
	cm = confusion_matrix(y_test, predictions)
	fig, ax = plt.subplots(figsize=(8, 6))
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
	ax.set_title('Confusion Matrix')
	ax.set_xlabel('Predicted')
	ax.set_ylabel('Actual')
	st.pyplot(fig)

	# Feature importance (for tree-based models)
	if hasattr(model, 'feature_importances_'):
	st.subheader("📊 Feature Importance")
	importance_df = pd.DataFrame({
	'Feature': selected_features,
	'Importance': model.feature_importances_
	}).sort_values('Importance', ascending=False)

	fig = px.bar(importance_df, x='Importance', y='Feature',
	orientation='h', title='Feature Importance')
	st.plotly_chart(fig, use_container_width=True)

	except Exception as e:
	st.error(f"❌ Error training model: {str(e)}")

	elif selected_page == "⚡ PyCaret AutoML":
	st.header("⚡ PyCaret AutoML")

	if not PYCARET_AVAILABLE:
	st.error("❌ PyCaret is not installed. Please install it to use AutoML features.")
	st.stop()

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first from the Data Loading page")
	st.stop()

	df = st.session_state.df

	# AutoML Configuration
	st.subheader("⚙️ AutoML Configuration")

	col1, col2 = st.columns(2)

	with col1:
	target_col = st.selectbox("🎯 Select target variable:", df.columns, key="pycaret_target")

	# Auto-detect problem type
	if df[target_col].dtype in ['object', 'bool'] or df[target_col].nunique() < 10:
	problem_type = "classification"
	st.info("🎯 Detected: Classification Problem")
	else:
	problem_type = "regression"
	st.info("📈 Detected: Regression Problem")

	with col2:
	train_size = st.slider("🔄 Training set size:", 0.5, 0.9, 0.8, 0.05)
	sample_size = st.slider("📊 Sample size (for performance):", 500, min(5000, len(df)), min(2000, len(df)))

	if len(df) > sample_size:
	df_sample = df.sample(n=sample_size, random_state=42)
	st.info(f"📊 Using {sample_size} samples for faster processing")
	else:
	df_sample = df.copy()

	# Advanced settings
	with st.expander("🔧 Advanced Settings"):
	col1, col2 = st.columns(2)
	with col1:
	cross_validation = st.checkbox("🔄 Cross Validation", value=True)
	normalize = st.checkbox("📏 Normalize Features", value=True)
	with col2:
	remove_outliers = st.checkbox("🚫 Remove Outliers", value=False)
	feature_selection = st.checkbox("🎯 Feature Selection", value=False)

	# Setup PyCaret Environment
	if st.button("🚀 Setup PyCaret Environment"):
	with st.spinner("Setting up PyCaret environment..."):
	try:
	if problem_type == "classification":
	st.session_state.pycaret_exp = cls_setup(
	data=df_sample,
	target=target_col,
	train_size=train_size,
	session_id=42,
	normalize=normalize,
	remove_outliers=remove_outliers,
	feature_selection=feature_selection,
	silent=True
	)
	else:
	st.session_state.pycaret_exp = reg_setup(
	data=df_sample,
	target=target_col,
	train_size=train_size,
	session_id=42,
	normalize=normalize,
	remove_outliers=remove_outliers,
	feature_selection=feature_selection,
	silent=True
	)

	st.session_state.pycaret_setup_done = True
	st.session_state.pycaret_problem_type = problem_type
	st.success("✅ PyCaret environment setup complete!")

	except Exception as e:
	st.error(f"❌ Error setting up PyCaret: {str(e)}")

	# Model Comparison
	if st.session_state.pycaret_setup_done:
	st.subheader("📊 Model Comparison")

	if st.button("🔄 Compare Models"):
	with st.spinner("Comparing multiple models..."):
	try:
	if st.session_state.pycaret_problem_type == "classification":
	comparison_df = cls_compare(
	include=['lr', 'rf', 'et', 'nb', 'dt', 'svm'],
	sort='Accuracy',
	n_select=5
	)
	st.session_state.model_comparison = cls_pull()
	else:
	comparison_df = reg_compare(
	include=['lr', 'rf', 'et', 'dt', 'huber'],
	sort='R2',
	n_select=5
	)
	st.session_state.model_comparison = reg_pull()

	st.success("✅ Model comparison complete!")

	except Exception as e:
	st.error(f"❌ Error comparing models: {str(e)}")

	# Display comparison results
	if st.session_state.model_comparison is not None:
	st.subheader("📈 Model Comparison Results")
	st.dataframe(st.session_state.model_comparison, use_container_width=True)

	# Select best model
	best_model_name = st.selectbox(
	"🏆 Select model for tuning:",
	['lr', 'rf', 'et', 'dt', 'nb', 'svm'] if st.session_state.pycaret_problem_type == "classification"
	else ['lr', 'rf', 'et', 'dt', 'huber']
	)

	# Create and tune model
	col1, col2 = st.columns(2)

	with col1:
	if st.button("🎯 Create Model"):
	with st.spinner("Creating model..."):
	try:
	if st.session_state.pycaret_problem_type == "classification":
	model = cls_create(best_model_name)
	else:
	model = reg_create(best_model_name)

	st.session_state.pycaret_model = model
	st.success("✅ Model created successfully!")

	except Exception as e:
	st.error(f"❌ Error creating model: {str(e)}")

	with col2:
	if st.button("⚡ Tune Hyperparameters"):
	if 'pycaret_model' in st.session_state:
	with st.spinner("Tuning hyperparameters..."):
	try:
	if st.session_state.pycaret_problem_type == "classification":
	tuned_model = cls_tune(st.session_state.pycaret_model,
	optimize='Accuracy', n_iter=10)
	else:
	tuned_model = reg_tune(st.session_state.pycaret_model,
	optimize='R2', n_iter=10)

	st.session_state.tuned_model = tuned_model
	st.success("✅ Hyperparameter tuning complete!")

	except Exception as e:
	st.error(f"❌ Error tuning model: {str(e)}")
	else:
	st.warning("⚠️ Please create a model first")

	# Finalize model
	if st.button("🏁 Finalize Best Model"):
	if 'tuned_model' in st.session_state:
	model_to_finalize = st.session_state.tuned_model
	elif 'pycaret_model' in st.session_state:
	model_to_finalize = st.session_state.pycaret_model
	else:
	st.warning("⚠️ Please create a model first")
	model_to_finalize = None

	if model_to_finalize is not None:
	with st.spinner("Finalizing model..."):
	try:
	if st.session_state.pycaret_problem_type == "classification":
	final_model = cls_finalize(model_to_finalize)
	else:
	final_model = reg_finalize(model_to_finalize)

	st.session_state.best_model = final_model
	st.success("✅ Model finalized successfully!")

	except Exception as e:
	st.error(f"❌ Error finalizing model: {str(e)}")

	elif selected_page == "🎯 Model Evaluation":
	st.header("🎯 Advanced Model Evaluation")

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first")
	st.stop()

	# Check for available models
	available_models = []
	if st.session_state.trained_models:
	available_models.extend(list(st.session_state.trained_models.keys()))
	if 'best_model' in st.session_state and st.session_state.best_model is not None:
	available_models.append("PyCaret Best Model")

	if not available_models:
	st.warning("⚠️ No trained models available. Please train a model first.")
	st.stop()

	selected_model_name = st.selectbox("📊 Select model to evaluate:", available_models)

	if selected_model_name == "PyCaret Best Model":
	if 'best_model' not in st.session_state:
	st.error("❌ PyCaret model not available")
	st.stop()

	model_info = st.session_state.best_model
	problem_type = st.session_state.get('pycaret_problem_type', 'regression')

	st.subheader("📈 PyCaret Model Evaluation")

	# PyCaret built-in plots
	if PYCARET_AVAILABLE:
	col1, col2 = st.columns(2)

	with col1:
	plot_types_cls = ['auc', 'confusion_matrix', 'class_report', 'pr', 'feature']
	plot_types_reg = ['residuals', 'feature', 'rfe', 'learning', 'vc']

	plot_types = plot_types_cls if problem_type == "classification" else plot_types_reg
	selected_plot = st.selectbox("📊 Select evaluation plot:", plot_types)

	with col2:
	if st.button("📊 Generate Plot"):
	try:
	with st.spinner("Generating plot..."):
	if problem_type == "classification":
	cls_plot(model_info, plot=selected_plot, display_format='streamlit')
	else:
	reg_plot(model_info, plot=selected_plot, display_format='streamlit')
	except Exception as e:
	st.error(f"❌ Error generating plot: {str(e)}")

	# Model predictions
	if st.button("🔮 Generate Predictions"):
	try:
	with st.spinner("Generating predictions..."):
	if problem_type == "classification":
	predictions_df = cls_predict(model_info)
	else:
	predictions_df = reg_predict(model_info)

	st.subheader("🔮 Model Predictions")
	st.dataframe(predictions_df.head(20), use_container_width=True)

	# Download predictions
	csv = predictions_df.to_csv(index=False)
	st.download_button(
	label="📥 Download Predictions",
	data=csv,
	file_name=f"predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
	mime='text/csv'
	)
	except Exception as e:
	st.error(f"❌ Error generating predictions: {str(e)}")

	else:
	# Classical ML model evaluation
	model_data = st.session_state.trained_models[selected_model_name]
	model = model_data['model']
	X_test = model_data['X_test']
	y_test = model_data['y_test']
	predictions = model_data['predictions']
	problem_type = model_data['problem_type']

	st.subheader(f"📊 {selected_model_name} Evaluation")

	if problem_type == "Regression":
	# Regression metrics
	mse = mean_squared_error(y_test, predictions)
	mae = mean_absolute_error(y_test, predictions)
	r2 = r2_score(y_test, predictions)
	rmse = np.sqrt(mse)

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("RMSE", f"{rmse:.4f}")
	with col2:
	st.metric("MAE", f"{mae:.4f}")
	with col3:
	st.metric("R² Score", f"{r2:.4f}")
	with col4:
	st.metric("MSE", f"{mse:.4f}")

	# Residual analysis
	residuals = y_test - predictions

	col1, col2 = st.columns(2)

	with col1:
	# Residual plot
	fig = px.scatter(x=predictions, y=residuals,
	labels={'x': 'Predicted', 'y': 'Residuals'},
	title='Residual Plot')
	fig.add_hline(y=0, line_dash="dash", line_color="red")
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Residual distribution
	fig = px.histogram(residuals, title='Residual Distribution',
	labels={'value': 'Residuals', 'count': 'Frequency'})
	st.plotly_chart(fig, use_container_width=True)

	else:
	# Classification metrics
	accuracy = accuracy_score(y_test, predictions)
	precision = precision_score(y_test, predictions, average='weighted')
	recall = recall_score(y_test, predictions, average='weighted')
	f1 = f1_score(y_test, predictions, average='weighted')

	col1, col2, col3, col4 = st.columns(4)
	with col1:
	st.metric("Accuracy", f"{accuracy:.4f}")
	with col2:
	st.metric("Precision", f"{precision:.4f}")
	with col3:
	st.metric("Recall", f"{recall:.4f}")
	with col4:
	st.metric("F1-Score", f"{f1:.4f}")

	elif selected_page == "🔬 Explainability":
	st.header("🔬 Model Explainability with SHAP")

	if not SHAP_AVAILABLE:
	st.warning("⚠️ SHAP is not installed. Explainability features are limited.")
	st.stop()

	if st.session_state.df is None:
	st.warning("⚠️ Please load data first")
	st.stop()

	# Check for available models
	if not st.session_state.trained_models and 'best_model' not in st.session_state:
	st.warning("⚠️ No trained models available. Please train a model first.")
	st.stop()

	# Select model for explanation
	available_models = list(st.session_state.trained_models.keys())
	if 'best_model' in st.session_state:
	available_models.append("PyCaret Best Model")

	selected_model = st.selectbox("🤖 Select model to explain:", available_models)

	if selected_model != "PyCaret Best Model":
	model_data = st.session_state.trained_models[selected_model]
	model = model_data['model']
	features = model_data['features']
	X_test = model_data['X_test']

	# SHAP Explanation
	st.subheader("🔬 SHAP Analysis")

	try:
	# Create SHAP explainer
	with st.spinner("Creating SHAP explainer..."):
	explainer = shap.Explainer(model, X_test.iloc[:100]) # Use subset for performance
	shap_values = explainer(X_test.iloc[:100])

	# Global feature importance
	st.subheader("🌍 Global Feature Importance")
	fig, ax = plt.subplots()
	shap.plots.bar(shap_values, ax=ax, show=False)
	st.pyplot(fig)

	# Summary plot
	st.subheader("📊 Feature Impact Summary")
	fig, ax = plt.subplots()
	shap.plots.beeswarm(shap_values, ax=ax, show=False)
	st.pyplot(fig)

	# Individual prediction explanation
	st.subheader("🔍 Individual Prediction Explanation")
	instance_idx = st.slider("Select instance:", 0, len(X_test)-1, 0)

	fig, ax = plt.subplots()
	shap.plots.waterfall(shap_values[instance_idx], ax=ax, show=False)
	st.pyplot(fig)

	# Feature dependence
	if len(features) > 1:
	st.subheader("📈 Feature Dependence")
	feature_for_dependence = st.selectbox("Select feature:", features)

	if feature_for_dependence in X_test.columns:
	fig, ax = plt.subplots()
	shap.plots.scatter(shap_values[:, feature_for_dependence], ax=ax, show=False)
	st.pyplot(fig)

	except Exception as e:
	st.error(f"❌ Error generating SHAP explanations: {str(e)}")
	st.info("💡 SHAP works best with tree-based models (Random Forest, XGBoost, etc.)")

	elif selected_page == "📋 MLflow Tracking":
	st.header("📋 MLflow Experiment Tracking")

	if not MLFLOW_AVAILABLE:
	st.warning("⚠️ MLflow is not installed. Install it to use experiment tracking.")
	st.stop()

	# MLflow Configuration
	st.subheader("⚙️ MLflow Configuration")

	col1, col2 = st.columns(2)

	with col1:
	tracking_uri = st.text_input("🔗 Tracking URI:", "http://localhost:5000")
	experiment_name = st.text_input("🧪 Experiment Name:", "super_app_experiments")

	with col2:
	if st.button("🔧 Set MLflow Configuration"):
	try:
	mlflow.set_tracking_uri(tracking_uri)
	mlflow.set_experiment(experiment_name)
	st.success("✅ MLflow configuration set!")
	except Exception as e:
	st.error(f"❌ Error setting MLflow: {str(e)}")

	# Log current models
	st.subheader("📊 Log Models to MLflow")

	if st.session_state.trained_models:
	model_to_log = st.selectbox("Select model to log:", list(st.session_state.trained_models.keys()))

	if st.button("📤 Log Model"):
	try:
	with mlflow.start_run(run_name=f"{model_to_log}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
	model_data = st.session_state.trained_models[model_to_log]
	model = model_data['model']

	# Log model
	mlflow.sklearn.log_model(model, "model")

	# Log parameters
	mlflow.log_param("model_type", model_to_log)
	mlflow.log_param("features", model_data['features'])
	mlflow.log_param("target", model_data['target'])

	# Log metrics (if available)
	if 'predictions' in model_data:
	y_test = model_data['y_test']
	predictions = model_data['predictions']

	if model_data['problem_type'] == "Regression":
	mlflow.log_metric("mse", mean_squared_error(y_test, predictions))
	mlflow.log_metric("mae", mean_absolute_error(y_test, predictions))
	mlflow.log_metric("r2", r2_score(y_test, predictions))
	else:
	mlflow.log_metric("accuracy", accuracy_score(y_test, predictions))

	st.success("✅ Model logged to MLflow!")

	except Exception as e:
	st.error(f"❌ Error logging model: {str(e)}")

	# Display recent runs
	st.subheader("📈 Recent Experiment Runs")

	if st.button("🔄 Refresh Runs"):
	try:
	runs = mlflow.search_runs(order_by=["start_time desc"])
	if not runs.empty:
	st.dataframe(runs[['run_id', 'status', 'start_time', 'params.model_type',
	'metrics.mse', 'metrics.r2', 'metrics.accuracy']],
	use_container_width=True)
	else:
	st.info("📊 No runs found. Start logging some models!")
	except Exception as e:
	st.error(f"❌ Error fetching runs: {str(e)}")

	elif selected_page == "🚀 Model Deployment":
	st.header("🚀 Model Deployment & Export")

	if not st.session_state.trained_models and 'best_model' not in st.session_state:
	st.warning("⚠️ No trained models available for deployment.")
	st.stop()

	# Model selection for deployment
	available_models = list(st.session_state.trained_models.keys())
	if 'best_model' in st.session_state:
	available_models.append("PyCaret Best Model")

	selected_model = st.selectbox("🤖 Select model for deployment:", available_models)

	# Model export options
	st.subheader("💾 Export Options")

	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("📦 Export Model (Pickle)"):
	try:
	import pickle

	if selected_model == "PyCaret Best Model":
	model_to_export = st.session_state.best_model
	else:
	model_to_export = st.session_state.trained_models[selected_model]['model']

	# Serialize model
	model_bytes = pickle.dumps(model_to_export)

	st.download_button(
	label="📥 Download Model",
	data=model_bytes,
	file_name=f"{selected_model.replace(' ', '_')}_model.pkl",
	mime="application/octet-stream"
	)
	st.success("✅ Model ready for download!")

	except Exception as e:
	st.error(f"❌ Error exporting model: {str(e)}")

	with col2:
	if st.button("📄 Generate Prediction Script"):
	# Generate Python script for predictions
	script_content = f'''
	import pandas as pd
	import pickle
	import numpy as np

	# Load the trained model
	def load_model(model_path):
	with open(model_path, 'rb') as f:
	model = pickle.load(f)
	return model

	# Make predictions
	def predict(model, input_data):
	"""
	Make predictions using the trained model

	Parameters:
	model: Trained model object
	input_data: pandas DataFrame with features

	Returns:
	predictions: numpy array of predictions
	"""
	predictions = model.predict(input_data)
	return predictions

	# Example usage
	if __name__ == "__main__":
	# Load your model
	model = load_model("path_to_your_model.pkl")

	# Create sample input data (replace with your actual data)
	sample_data = pd.DataFrame({{
	# Add your feature columns here
	# 'feature1': [value1],
	# 'feature2': [value2],
	}})

	# Make predictions
	predictions = predict(model, sample_data)
	print("Predictions:", predictions)
	'''

	st.download_button(
	label="📥 Download Script",
	data=script_content,
	file_name=f"{selected_model.replace(' ', '_')}_prediction_script.py",
	mime="text/plain"
	)
	st.success("✅ Prediction script ready!")

	with col3:
	if st.button("🐳 Generate Dockerfile"):
	dockerfile_content = '''
	FROM python:3.9-slim

	WORKDIR /app

	# Copy requirements
	COPY requirements.txt .
	RUN pip install -r requirements.txt

	# Copy model and script
	COPY model.pkl .
	COPY app.py .

	# Expose port
	EXPOSE 8000

	# Run the application
	CMD ["python", "app.py"]
	'''

	requirements_content = '''
	pandas==1.5.3
	scikit-learn==1.3.0
	numpy==1.24.3
	flask==2.3.2
	'''

	col_a, col_b = st.columns(2)
	with col_a:
	st.download_button(
	label="📥 Download Dockerfile",
	data=dockerfile_content,
	file_name="Dockerfile",
	mime="text/plain"
	)
	with col_b:
	st.download_button(
	label="📥 Download Requirements",
	data=requirements_content,
	file_name="requirements.txt",
	mime="text/plain"
	)

	st.success("✅ Docker files ready!")

	# Model API endpoint generator
	st.subheader("🌐 API Endpoint Generator")

	if st.button("🔧 Generate Flask API"):
	api_code = f'''
	from flask import Flask, request, jsonify
	import pandas as pd
	import pickle
	import numpy as np

	app = Flask(__name__)

	# Load model at startup
	model = None

	def load_model():
	global model
	with open('model.pkl', 'rb') as f:
	model = pickle.load(f)

	@app.route('/predict', methods=['POST'])
	def predict():
	try:
	# Get data from request
	data = request.get_json()

	# Convert to DataFrame
	df = pd.DataFrame([data])

	# Make prediction
	prediction = model.predict(df)

	# Return result
	return jsonify({{
	'prediction': prediction.tolist(),
	'status': 'success'
	}})

	except Exception as e:
	return jsonify({{
	'error': str(e),
	'status': 'error'
	}}), 400

	@app.route('/health', methods=['GET'])
	def health():
	return jsonify({{'status': 'healthy'}})

	if __name__ == '__main__':
	load_model()
	app.run(host='0.0.0.0', port=8000, debug=False)
	'''

	st.download_button(
	label="📥 Download Flask API",
	data=api_code,
	file_name="app.py",
	mime="text/plain"
	)
	st.success("✅ Flask API code ready!")

	# Deployment instructions
	st.subheader("📋 Deployment Instructions")

	st.markdown("""
	### 🚀 Deployment Steps:

	1. Local Deployment:
	- Download the model pickle file
	- Download the prediction script or Flask API
	- Install required dependencies: `pip install -r requirements.txt`
	- Run the application: `python app.py`

	2. Docker Deployment:
	- Download all generated files (Dockerfile, requirements.txt, app.py, model.pkl)
	- Build image: `docker build -t my-ml-app .`
	- Run container: `docker run -p 8000:8000 my-ml-app`

	3. Cloud Deployment:
	- AWS: Upload to EC2 or use ECS with the Docker image
	- GCP: Deploy to Google Cloud Run or App Engine
	- Azure: Use Azure Container Instances or App Service
	- Heroku: Push Docker image to Heroku Container Registry

	4. API Usage Example:
	```bash
	curl -X POST http://localhost:8000/predict \
	-H "Content-Type: application/json" \
	-d '{"feature1": 1.0, "feature2": 2.0}'
	```
	""")

	# Model performance summary
	if selected_model != "PyCaret Best Model" and selected_model in st.session_state.trained_models:
	st.subheader("📊 Model Summary for Deployment")

	model_data = st.session_state.trained_models[selected_model]

	col1, col2 = st.columns(2)

	with col1:
	st.write("Model Details:")
	st.write(f"- Type: {selected_model}")
	st.write(f"- Problem Type: {model_data['problem_type']}")
	st.write(f"- Features: {len(model_data['features'])}")
	st.write(f"- Target: {model_data['target']}")

	with col2:
	if 'predictions' in model_data:
	y_test = model_data['y_test']
	predictions = model_data['predictions']

	st.write("Performance Metrics:")
	if model_data['problem_type'] == "Regression":
	r2 = r2_score(y_test, predictions)
	mae = mean_absolute_error(y_test, predictions)
	st.write(f"- R² Score: {r2:.4f}")
	st.write(f"- MAE: {mae:.4f}")
	else:
	accuracy = accuracy_score(y_test, predictions)
	st.write(f"- Accuracy: {accuracy:.4f}")

	# ================== FOOTER ==================
	st.markdown("---")
	col1, col2, col3 = st.columns(3)

	with col1:
	st.markdown("### 📊 Quick Stats")
	if st.session_state.df is not None:
	st.write(f"Dataset: {st.session_state.df.shape[0]} rows × {st.session_state.df.shape[1]} cols")
	st.write(f"Models Trained: {len(st.session_state.trained_models)}")

	with col2:
	st.markdown("### 🔗 Quick Actions")
	if st.button("🔄 Reset All Data", key="footer_reset"):
	for key in list(st.session_state.keys()):
	if key not in ['authenticated', 'demo_mode']:
	del st.session_state[key]
	st.success("✅ All data reset!")
	st.rerun()

	with col3:
	st.markdown("### ℹ️ App Info")
	st.write("Super Data Science App v2.0")
	st.write(f"Session: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

	# ================== SIDEBAR STATUS ==================
	st.sidebar.markdown("---")
	st.sidebar.subheader("📊 Current Status")

	# Data status
	if st.session_state.df is not None:
	st.sidebar.success(f"✅ Data Loaded ({st.session_state.df.shape[0]} rows)")
	else:
	st.sidebar.warning("⚠️ No Data Loaded")

	# Models status
	if st.session_state.trained_models:
	st.sidebar.success(f"✅ {len(st.session_state.trained_models)} Classical Models")
	else:
	st.sidebar.info("ℹ️ No Classical Models")

	if st.session_state.pycaret_setup_done:
	st.sidebar.success("✅ PyCaret Setup Complete")
	else:
	st.sidebar.info("ℹ️ PyCaret Not Setup")

	if st.session_state.dl_models:
	st.sidebar.success(f"✅ {len(st.session_state.dl_models)} Deep Learning Models")
	else:
	st.sidebar.info("ℹ️ No Deep Learning Models")

	# Available libraries status
	st.sidebar.markdown("---")
	st.sidebar.subheader("📚 Libraries Status")
	st.sidebar.write(f"PyCaret: {'✅' if PYCARET_AVAILABLE else '❌'}")
	st.sidebar.write(f"PyTorch: {'✅' if TORCH_AVAILABLE else '❌'}")
	st.sidebar.write(f"MLflow: {'✅' if MLFLOW_AVAILABLE else '❌'}")
	st.sidebar.write(f"SHAP: {'✅' if SHAP_AVAILABLE else '❌'}")
	#st.sidebar.write(f"Profiling: {'✅' if PROFILING_AVAILABLE else '❌'}")

	# Help section
	st.sidebar.markdown("---")
	st.sidebar.subheader("❓ Need Help?")
	st.sidebar.markdown("""
	Quick Start:
	1. 📊 Load data (sample or upload)
	2. 🔍 Explore with EDA
	3. 🤖 Train models (Classical or AutoML)
	4. 🎯 Evaluate performance
	5. 🚀 Deploy your model

	Tips:
	- Use sample data for quick testing
	- PyCaret AutoML for best results
	- Export models for production use
	""")

	# Advanced features hint
	if st.sidebar.button("🎯 Show Advanced Tips"):
	st.sidebar.info("""
	Advanced Features:
	- Feature engineering in EDA
	- Hyperparameter tuning in Classical ML
	- Cross-validation in PyCaret
	- SHAP explanations for interpretability
	- MLflow for experiment tracking
	- Docker deployment ready
	""")

	# Debug mode for development
	if st.sidebar.checkbox("🐛 Debug Mode", key="debug_mode"):
	st.sidebar.subheader("🔧 Debug Info")
	st.sidebar.write("Session State Keys:")
	for key in st.session_state.keys():
	if not key.startswith('_'):
	st.sidebar.write(f"- {key}")

	# Performance optimization note
	st.sidebar.markdown("---")
	st.sidebar.caption("💡 For large datasets, consider using data sampling for faster processing")
	st.sidebar.caption(f"⏰ Last updated: {datetime.now().strftime('%H:%M:%S')}")

	# Auto-refresh data (for development)
	if st.sidebar.button("🔄 Auto Refresh", key="auto_refresh"):
	st.rerun()

	# Export session state
	if st.sidebar.button("💾 Export Session", key="export_session"):
	session_data = {
	'trained_models_count': len(st.session_state.trained_models),
	'data_loaded': st.session_state.df is not None,
	'pycaret_setup': st.session_state.pycaret_setup_done,
	'timestamp': datetime.now().isoformat()
	}

	st.sidebar.download_button(
	label="📥 Download Session Info",
	data=str(session_data),
	file_name=f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
	mime="text/plain"
	)

	# Success message for completion
	if (st.session_state.df is not None and
	st.session_state.trained_models and
	st.session_state.pycaret_setup_done):

	st.sidebar.success("🎉 Full Pipeline Complete!")
	st.sidebar.balloons()

	# Warning for missing dependencies
	missing_deps = []
	if not PYCARET_AVAILABLE:
	missing_deps.append("pycaret")
	if not MLFLOW_AVAILABLE:
	missing_deps.append("mlflow")
	if not SHAP_AVAILABLE:
	missing_deps.append("shap")
	#if not PROFILING_AVAILABLE:
	# missing_deps.append("ydata-profiling")

	if missing_deps:
	st.sidebar.warning(f"⚠️ Missing: {', '.join(missing_deps)}")
	st.sidebar.code(f"pip install {' '.join(missing_deps)}")

	# Fun facts
	fun_facts = [
	"🧠 Machine Learning can predict with 95%+ accuracy in many domains",
	"🚀 AutoML can save 80% of model development time",
	"📊 Feature engineering often provides the biggest performance boost",
	"🔬 Model explainability is crucial for production deployment",
	"⚡ Ensemble methods usually outperform single models",
	"📈 Cross-validation prevents overfitting better than simple train/test split"
	]

	import random
	if st.sidebar.button("💡 Random ML Tip", key="random_tip"):
	st.sidebar.info(random.choice(fun_facts))

	# Resource links
	st.sidebar.markdown("---")
	st.sidebar.subheader("📚 Resources")
	st.sidebar.markdown("""
	- [PyCaret Documentation](https://pycaret.org/)
	- [MLflow Documentation](https://mlflow.org/)
	- [SHAP Tutorials](https://shap.readthedocs.io/)
	- [Scikit-learn Guide](https://scikit-learn.org/)
	""")

	# Version info and credits
	st.sidebar.markdown("---")
	st.sidebar.caption("🚀 Super Data Science App")
	st.sidebar.caption("Version 2.0 - Full Pipeline")
	st.sidebar.caption("Built with Streamlit ❤️")