🚀 Super Data Science App


import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import warnings
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVC, SVR
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score,
    accuracy_score, precision_score, recall_score, f1_score, 
    confusion_matrix, classification_report, roc_auc_score
)
warnings.filterwarnings('ignore')

# MLflow and experiment tracking
try:
    import mlflow
    import mlflow.sklearn
    MLFLOW_AVAILABLE = True
except ImportError:
    MLFLOW_AVAILABLE = False
    st.warning("MLflow not installed. Some features may be limited.")

# PyCaret imports
try:
    from pycaret.classification import setup as cls_setup, compare_models as cls_compare, create_model as cls_create
    from pycaret.classification import tune_model as cls_tune, finalize_model as cls_finalize, predict_model as cls_predict
    from pycaret.classification import pull as cls_pull, plot_model as cls_plot, evaluate_model as cls_evaluate
    from pycaret.regression import setup as reg_setup, compare_models as reg_compare, create_model as reg_create
    from pycaret.regression import tune_model as reg_tune, finalize_model as reg_finalize, predict_model as reg_predict
    from pycaret.regression import pull as reg_pull, plot_model as reg_plot, evaluate_model as reg_evaluate
    PYCARET_AVAILABLE = True
except ImportError:
    PYCARET_AVAILABLE = False
    st.warning("PyCaret not installed. AutoML features will be limited.")

# Data profiling
#try:
#    from ydata_profiling import ProfileReport
#    from streamlit_pandas_profiling import st_profile_report
#    PROFILING_AVAILABLE = True
#except ImportError:
#    PROFILING_AVAILABLE = False

# PyTorch for deep learning
try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import TensorDataset, DataLoader
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False

# SHAP for explainability
try:
    import shap
    SHAP_AVAILABLE = True
except ImportError:
    SHAP_AVAILABLE = False
# Scikit-learn imports
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# ================== CUSTOM CSS & STYLING ==================
st.set_page_config(
    page_title="🚀 Super Data Science App",
    layout="wide",
    initial_sidebar_state="expanded",
    page_icon="🚀"
)

st.markdown("""
<style>
    /* Main styling */
    .main { 
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        font-family: 'Arial', sans-serif;
    }
    
    /* Sidebar styling */
    .sidebar .sidebar-content { 
        background: linear-gradient(180deg, #2C3E50, #3498DB);
        color: white;
    }
    
    /* Button styling */
    .stButton > button {
        background: linear-gradient(45deg, #FF6B6B, #4ECDC4);
        color: white;
        border: none;
        border-radius: 25px;
        padding: 0.6rem 1.5rem;
        font-weight: bold;
        transition: all 0.3s ease;
        box-shadow: 0 4px 15px 0 rgba(31, 38, 135, 0.37);
    }
    
    .stButton > button:hover {
        transform: translateY(-2px);
        box-shadow: 0 8px 25px 0 rgba(31, 38, 135, 0.37);
    }
    
    /* Metric styling */
    .metric-container {
        background: rgba(255, 255, 255, 0.1);
        backdrop-filter: blur(10px);
        border-radius: 15px;
        padding: 1rem;
        margin: 0.5rem 0;
        border: 1px solid rgba(255, 255, 255, 0.2);
    }
    
    /* Header styling */
    .main-header {
        text-align: center;
        padding: 2rem 0;
        background: rgba(255, 255, 255, 0.1);
        backdrop-filter: blur(10px);
        border-radius: 20px;
        margin-bottom: 2rem;
        border: 1px solid rgba(255, 255, 255, 0.2);
    }
    
    /* Success/Error messages */
    .stSuccess, .stError, .stWarning {
        border-radius: 10px;
        border: none;
    }
</style>
""", unsafe_allow_html=True)

# ================== HEADER ==================
st.markdown("""
<div class="main-header">
    <h1 style="color: white; font-size: 3rem; margin-bottom: 0;">🚀 Super Data Science App</h1>
    <p style="color: rgba(255,255,255,0.8); font-size: 1.2rem;">
        Complete ML Pipeline: EDA → Modeling → AutoML → Explainability → Deployment
    </p>
</div>
""", unsafe_allow_html=True)

# ================== AUTHENTICATION ==================
def check_authentication():
    if 'authenticated' not in st.session_state:
        st.session_state.authenticated = False
    
    if not st.session_state.authenticated:
        with st.sidebar:
            st.header("🔒 Authentication")
            password = st.text_input("Enter Password", type="password", key="auth_password")
            col1, col2 = st.columns(2)
            with col1:
                if st.button("🔑 Login", key="login_btn"):
                    if password == "ds4everyone":
                        st.session_state.authenticated = True
                        st.success("✅ Access Granted!")
                        st.rerun()
                    else:
                        st.error("❌ Incorrect Password")
            with col2:
                if st.button("👤 Demo Mode", key="demo_btn"):
                    st.session_state.authenticated = True
                    st.session_state.demo_mode = True
                    st.info("📊 Demo Mode Activated")
                    st.rerun()
        
        st.info("🔐 Please authenticate to access the application")
        st.stop()

check_authentication()

# ================== SESSION STATE INITIALIZATION ==================
if 'df' not in st.session_state:
    st.session_state.df = None
if 'trained_models' not in st.session_state:
    st.session_state.trained_models = {}
if 'pycaret_setup_done' not in st.session_state:
    st.session_state.pycaret_setup_done = False
if 'best_model' not in st.session_state:
    st.session_state.best_model = None
if 'dl_models' not in st.session_state:
    st.session_state.dl_models = {}
if 'training_history' not in st.session_state:
    st.session_state.training_history = {}

# ================== SIDEBAR NAVIGATION ==================
st.sidebar.title("🧭 Navigation")
pages = [
    "🏠 Home",
    "📊 Data Loading",
    "🔍 EDA & Profiling", 
    "📈 Visualization",
    "🤖 Classical ML",
    "⚡ PyCaret AutoML",
    "🧠 Deep Learning",
    "🎯 Model Evaluation",
    "🔬 Explainability",
    "📋 MLflow Tracking",
    "🚀 Model Deployment"
]

selected_page = st.sidebar.selectbox("Select Page", pages, key="page_selector")

# ================== UTILITY FUNCTIONS ==================
def load_sample_data(dataset_name):
    """Load sample datasets"""
    if dataset_name == "California Housing":
        from sklearn.datasets import fetch_california_housing
        data = fetch_california_housing(as_frame=True)
        df = pd.concat([data.data, data.target.rename('MedHouseVal')], axis=1)
        return df.sample(n=min(2000, len(df)))  # Limit for performance
    
    elif dataset_name == "Iris":
        from sklearn.datasets import load_iris
        data = load_iris(as_frame=True)
        df = pd.concat([data.data, data.target.rename('species')], axis=1)
        return df
    
    elif dataset_name == "Wine Quality":
        url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
        try:
            df = pd.read_csv(url, sep=';')
            return df.sample(n=min(1000, len(df)))
        except:
            st.error("Could not load Wine Quality dataset")
            return None
    
    elif dataset_name == "Titanic":
        url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
        try:
            df = pd.read_csv(url)
            return df
        except:
            st.error("Could not load Titanic dataset")
            return None

def get_dataset_info(df):
    """Get comprehensive dataset information"""
    info = {
        'shape': df.shape,
        'columns': df.columns.tolist(),
        'dtypes': df.dtypes.to_dict(),
        'missing_values': df.isnull().sum().to_dict(),
        'memory_usage': f"{df.memory_usage(deep=True).sum() / 1024**2:.2f} MB",
        'numeric_columns': df.select_dtypes(include=[np.number]).columns.tolist(),
        'categorical_columns': df.select_dtypes(exclude=[np.number]).columns.tolist()
    }
    return info

# ================== PAGE CONTENT ==================

if selected_page == "🏠 Home":
    col1, col2, col3 = st.columns([1, 2, 1])
    
    with col2:
        st.markdown("""
        ## Welcome to the Super Data Science App! 🎉
        
        This comprehensive application provides a complete machine learning pipeline:
        """)
        
        features = [
            "📊 **Data Loading**: Upload CSV or use sample datasets",
            "🔍 **EDA & Profiling**: Automated data profiling and exploration", 
            "📈 **Visualization**: Interactive charts with Plotly and Seaborn",
            "🤖 **Classical ML**: Scikit-learn models with hyperparameter tuning",
            "⚡ **PyCaret AutoML**: Automated machine learning with model comparison",
            "🎯 **Model Evaluation**: Comprehensive model performance analysis",
            "🔬 **Explainability**: SHAP values and feature importance",
            "📋 **MLflow Tracking**: Experiment tracking and model versioning",
            "🚀 **Model Deployment**: Model export and deployment preparation"
        ]
        
        for feature in features:
            st.markdown(feature)
        
        st.markdown("---")
        
        # Quick stats
        if st.session_state.df is not None:
            col_a, col_b, col_c, col_d = st.columns(4)
            with col_a:
                st.metric("📊 Rows", f"{st.session_state.df.shape[0]:,}")
            with col_b:
                st.metric("📋 Columns", f"{st.session_state.df.shape[1]:,}")
            with col_c:
                st.metric("🤖 Models Trained", len(st.session_state.trained_models))
            with col_d:
                st.metric("✅ Setup Complete", "Ready" if st.session_state.pycaret_setup_done else "Pending")

elif selected_page == "📊 Data Loading":
    st.header("📊 Data Loading & Management")
    
    col1, col2 = st.columns([1, 2])
    
    with col1:
        st.subheader("Data Source")
        data_source = st.radio(
            "Choose data source:",
            ["📁 Upload CSV", "🎲 Sample Datasets", "📋 Current Data Info"]
        )
    
    with col2:
        if data_source == "📁 Upload CSV":
            uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
            if uploaded_file is not None:
                try:
                    df = pd.read_csv(uploaded_file)
                    st.session_state.df = df
                    st.success(f"✅ Successfully loaded {df.shape[0]} rows and {df.shape[1]} columns")
                except Exception as e:
                    st.error(f"❌ Error loading file: {str(e)}")
    if 'model_comparison' not in st.session_state:
        st.session_state.model_comparison = None
        #st.error(f"❌ Error loading file: {str(e)}")
        
    elif data_source == "🎲 Sample Datasets":
        sample_options = ["California Housing", "Iris", "Wine Quality", "Titanic"]
        selected_sample = st.selectbox("Choose sample dataset:", sample_options)
            
        if st.button(f"🔄 Load {selected_sample} Dataset"):
            with st.spinner(f"Loading {selected_sample}..."):
                df = load_sample_data(selected_sample)
                if df is not None:
                    st.session_state.df = df
                    st.success(f"✅ Loaded {selected_sample} dataset!")
        
    elif data_source == "📋 Current Data Info":
        if st.session_state.df is not None:
            info = get_dataset_info(st.session_state.df)
                
            col_a, col_b = st.columns(2)
            with col_a:
                st.metric("📊 Rows", f"{info['shape'][0]:,}")
                st.metric("📋 Columns", f"{info['shape'][1]:,}")
                st.metric("💾 Memory Usage", info['memory_usage'])
                
            with col_b:
                st.metric("🔢 Numeric Columns", len(info['numeric_columns']))
                st.metric("📝 Categorical Columns", len(info['categorical_columns']))
                st.metric("❌ Missing Values", sum(info['missing_values'].values()))
        else:
            st.info("🔍 No data loaded yet")
    
    # Data Preview
    if st.session_state.df is not None:
        st.subheader("📋 Data Preview")
        
        col1, col2, col3 = st.columns(3)
        with col1:
            show_rows = st.slider("Rows to display", 5, 50, 10)
        with col2:
            show_info = st.checkbox("Show column info", value=True)
        with col3:
            if st.button("💾 Download Current Data"):
                csv = st.session_state.df.to_csv(index=False)
                st.download_button(
                    label="📥 Download CSV",
                    data=csv,
                    file_name=f"processed_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                    mime='text/csv'
                )
        
        # Display data
        st.dataframe(st.session_state.df.head(show_rows), use_container_width=True)
        
        if show_info:
            st.subheader("📊 Column Information")
            info_df = pd.DataFrame({
                'Column': st.session_state.df.columns,
                'Data Type': st.session_state.df.dtypes,
                'Non-Null Count': st.session_state.df.count(),
                'Missing Values': st.session_state.df.isnull().sum(),
                'Missing %': (st.session_state.df.isnull().sum() / len(st.session_state.df) * 100).round(2)
            })
            st.dataframe(info_df, use_container_width=True)

elif selected_page == "🔍 EDA & Profiling":
    st.header("🔍 Exploratory Data Analysis & Profiling")
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first from the Data Loading page")
        st.stop()
    
    df = st.session_state.df
    
    # Quick EDA
    st.subheader("📊 Quick Statistics")
    col1, col2, col3, col4 = st.columns(4)
    
    with col1:
        st.metric("📏 Dataset Shape", f"{df.shape[0]} × {df.shape[1]}")
    with col2:
        st.metric("🔢 Numeric Columns", len(df.select_dtypes(include=[np.number]).columns))
    with col3:
        st.metric("📝 Text Columns", len(df.select_dtypes(exclude=[np.number]).columns))
    with col4:
        st.metric("❌ Missing Values", df.isnull().sum().sum())
    
    # Missing Values Analysis
    st.subheader("❌ Missing Values Analysis")
    missing_df = pd.DataFrame({
        'Column': df.columns,
        'Missing Count': df.isnull().sum(),
        'Missing Percentage': (df.isnull().sum() / len(df) * 100).round(2)
    }).sort_values('Missing Count', ascending=False)
    
    missing_df = missing_df[missing_df['Missing Count'] > 0]
    
    if len(missing_df) > 0:
        st.dataframe(missing_df, use_container_width=True)
        
        # Missing values heatmap
        fig, ax = plt.subplots(figsize=(12, 8))
        sns.heatmap(df.isnull(), yticklabels=False, cbar=True, cmap='viridis')
        plt.title('Missing Values Heatmap')
        st.pyplot(fig)
    else:
        st.success("✅ No missing values found in the dataset!")
    
    # Statistical Summary
    st.subheader("📈 Statistical Summary")
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    
    if len(numeric_cols) > 0:
        st.dataframe(df[numeric_cols].describe(), use_container_width=True)
        
        # Distribution plots
        st.subheader("📊 Distribution Analysis")
        selected_cols = st.multiselect("Select columns for distribution analysis:", numeric_cols, default=numeric_cols[:3])
        
        if selected_cols:
            cols_per_row = 2
            n_rows = (len(selected_cols) + cols_per_row - 1) // cols_per_row
            
            fig, axes = plt.subplots(n_rows, cols_per_row, figsize=(15, 5*n_rows))
            if n_rows == 1:
                axes = [axes] if cols_per_row == 1 else axes
            else:
                axes = axes.flatten()
            
            for i, col in enumerate(selected_cols):
                sns.histplot(data=df, x=col, kde=True, ax=axes[i])
                axes[i].set_title(f'Distribution of {col}')
            
            # Hide empty subplots
            for i in range(len(selected_cols), len(axes)):
                axes[i].set_visible(False)
            
            plt.tight_layout()
            st.pyplot(fig)
    
    # Correlation Analysis
    if len(numeric_cols) > 1:
        st.subheader("🔗 Correlation Analysis")
        corr_matrix = df[numeric_cols].corr()
        
        fig, ax = plt.subplots(figsize=(12, 10))
        sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
                   square=True, fmt='.2f', ax=ax)
        plt.title('Correlation Matrix')
        st.pyplot(fig)
    
    # Automated Profiling Report
 #   if PROFILING_AVAILABLE:
 #       st.subheader("📋 Automated Profiling Report")
 #       if st.button("🔄 Generate Comprehensive Profile Report"):
 #           with st.spinner("Generating detailed profiling report..."):
 #               profile = ProfileReport(df, title="Dataset Profiling Report", explorative=True)
 #               st_profile_report(profile)

elif selected_page == "📈 Visualization":
    st.header("📈 Interactive Data Visualization")
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first from the Data Loading page")
        st.stop()
    
    df = st.session_state.df
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_cols = df.select_dtypes(exclude=[np.number]).columns.tolist()
    
    # Visualization controls
    st.subheader("🎛️ Visualization Controls")
    
    col1, col2, col3 = st.columns(3)
    with col1:
        viz_type = st.selectbox("Select visualization type:", [
            "📊 Histogram", "📈 Scatter Plot", "📦 Box Plot", 
            "🔥 Heatmap", "📉 Line Plot", "🎯 Pair Plot"
        ])
    
    with col2:
        if viz_type in ["📈 Scatter Plot", "📉 Line Plot"]:
            x_col = st.selectbox("X-axis:", numeric_cols + categorical_cols)
            y_col = st.selectbox("Y-axis:", numeric_cols)
        else:
            selected_col = st.selectbox("Select column:", numeric_cols if viz_type != "📦 Box Plot" else df.columns)
    
    with col3:
        if categorical_cols and viz_type in ["📊 Histogram", "📈 Scatter Plot", "📦 Box Plot"]:
            color_col = st.selectbox("Color by (optional):", ["None"] + categorical_cols)
            color_col = None if color_col == "None" else color_col
        else:
            color_col = None
    
    # Generate visualizations
    st.subheader("📊 Visualization Output")
    
    try:
        if viz_type == "📊 Histogram":
            fig = px.histogram(df, x=selected_col, color=color_col, 
                             title=f'Distribution of {selected_col}',
                             marginal="box")
            st.plotly_chart(fig, use_container_width=True)
        
        elif viz_type == "📈 Scatter Plot":
            fig = px.scatter(df, x=x_col, y=y_col, color=color_col,
                           title=f'{y_col} vs {x_col}',
                           trendline="ols" if color_col is None else None)
            st.plotly_chart(fig, use_container_width=True)
        
        elif viz_type == "📦 Box Plot":
            if color_col:
                fig = px.box(df, y=selected_col, x=color_col,
                           title=f'Box Plot of {selected_col} by {color_col}')
            else:
                fig = px.box(df, y=selected_col,
                           title=f'Box Plot of {selected_col}')
            st.plotly_chart(fig, use_container_width=True)
        
        elif viz_type == "🔥 Heatmap":
            if len(numeric_cols) > 1:
                corr_matrix = df[numeric_cols].corr()
                fig = px.imshow(corr_matrix, text_auto=True, aspect="auto",
                              title="Correlation Heatmap")
                st.plotly_chart(fig, use_container_width=True)
            else:
                st.warning("Need at least 2 numeric columns for correlation heatmap")
        
        elif viz_type == "📉 Line Plot":
            fig = px.line(df.sort_values(x_col), x=x_col, y=y_col,
                         title=f'{y_col} vs {x_col} (Line Plot)')
            st.plotly_chart(fig, use_container_width=True)
        
        elif viz_type == "🎯 Pair Plot":
            if len(numeric_cols) >= 2:
                selected_numeric = st.multiselect("Select numeric columns for pair plot:", 
                                                numeric_cols, default=numeric_cols[:4])
                if len(selected_numeric) >= 2:
                    fig = px.scatter_matrix(df, dimensions=selected_numeric, color=color_col,
                                          title="Pair Plot Matrix")
                    st.plotly_chart(fig, use_container_width=True)
                else:
                    st.warning("Please select at least 2 numeric columns")
            else:
                st.warning("Need at least 2 numeric columns for pair plot")
    
    except Exception as e:
        st.error(f"Error generating visualization: {str(e)}")
    
    # Additional visualizations
    st.subheader("📊 Additional Insights")
    
    # Value counts for categorical columns
    if categorical_cols:
        st.write("**Categorical Column Distributions:**")
        for col in categorical_cols[:3]:  # Limit to first 3
            if df[col].nunique() <= 20:  # Only show if not too many categories
                fig = px.bar(df[col].value_counts().head(10), 
                           title=f'Top 10 values in {col}')
                st.plotly_chart(fig, use_container_width=True)

elif selected_page == "🤖 Classical ML":
    st.header("🤖 Classical Machine Learning")
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first from the Data Loading page")
        st.stop()
    
    df = st.session_state.df
    
    # Model configuration
    st.subheader("⚙️ Model Configuration")
    
    col1, col2 = st.columns(2)
    
    with col1:
        # Target selection
        target_col = st.selectbox("🎯 Select target variable:", df.columns)
        
        # Feature selection
        available_features = [col for col in df.columns if col != target_col]
        selected_features = st.multiselect("📊 Select features:", available_features, 
                                         default=available_features[:5])
    
    with col2:
        # Problem type detection
        if df[target_col].dtype in ['object', 'bool'] or df[target_col].nunique() < 10:
            problem_type = "Classification"
            st.info("🎯 Detected: Classification Problem")
            model_options = ["Logistic Regression", "Decision Tree", "Random Forest"]
        else:
            problem_type = "Regression"
            st.info("📈 Detected: Regression Problem")
            model_options = ["Linear Regression", "Decision Tree", "Random Forest"]
        
        selected_model = st.selectbox("🤖 Select model:", model_options)
        test_size = st.slider("🔄 Test set size:", 0.1, 0.5, 0.2, 0.05)
    
    if not selected_features:
        st.warning("⚠️ Please select at least one feature")
        st.stop()
    
    # Data preprocessing
    if st.button("🚀 Train Model"):
        with st.spinner("Training model..."):
            try:
                # Prepare data
                X = df[selected_features].copy()
                y = df[target_col].copy()
                
                # Handle missing values
                X = X.fillna(X.mean() if X.select_dtypes(include=[np.number]).shape[1] > 0 else X.mode().iloc[0])
                
                # Encode categorical variables
                le_dict = {}
                for col in X.select_dtypes(include=['object']).columns:
                    le = LabelEncoder()
                    X[col] = le.fit_transform(X[col].astype(str))
                    le_dict[col] = le
                
                # Encode target if classification
                if problem_type == "Classification" and y.dtype == 'object':
                    target_le = LabelEncoder()
                    y = target_le.fit_transform(y)
                
                # Split data
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=test_size, random_state=42
                )
                
                # Train model
                if selected_model == "Linear Regression":
                    model = LinearRegression()
                elif selected_model == "Logistic Regression":
                    model = LogisticRegression(random_state=42, max_iter=1000)
                elif selected_model == "Decision Tree":
                    if problem_type == "Classification":
                        model = DecisionTreeClassifier(random_state=42)
                    else:
                        model = DecisionTreeRegressor(random_state=42)
                elif selected_model == "Random Forest":
                    if problem_type == "Classification":
                        model = RandomForestClassifier(random_state=42, n_estimators=100)
                    else:
                        model = RandomForestRegressor(random_state=42, n_estimators=100)
                
                model.fit(X_train, y_train)
                predictions = model.predict(X_test)
                
                # Store model
                st.session_state.trained_models[selected_model] = {
                    'model': model,
                    'X_test': X_test,
                    'y_test': y_test,
                    'predictions': predictions,
                    'features': selected_features,
                    'target': target_col,
                    'problem_type': problem_type
                }
                
                st.success("✅ Model trained successfully!")
                
                # Display results
                st.subheader("📊 Model Performance")
                
                if problem_type == "Regression":
                    mse = mean_squared_error(y_test, predictions)
                    mae = mean_absolute_error(y_test, predictions)
                    r2 = r2_score(y_test, predictions)
                    
                    col1, col2, col3 = st.columns(3)
                    with col1:
                        st.metric("MSE", f"{mse:.4f}")
                    with col2:
                        st.metric("MAE", f"{mae:.4f}")
                    with col3:
                        st.metric("R² Score", f"{r2:.4f}")
                    
                    # Actual vs Predicted plot
                    fig = px.scatter(x=y_test, y=predictions, 
                                   labels={'x': 'Actual', 'y': 'Predicted'},
                                   title='Actual vs Predicted Values')
                    fig.add_shape(type="line", x0=y_test.min(), y0=y_test.min(), 
                                x1=y_test.max(), y1=y_test.max(),
                                line=dict(color="red", dash="dash"))
                    st.plotly_chart(fig, use_container_width=True)
                
                else:  # Classification
                    accuracy = accuracy_score(y_test, predictions)
                    precision = precision_score(y_test, predictions, average='weighted')
                    recall = recall_score(y_test, predictions, average='weighted')
                    f1 = f1_score(y_test, predictions, average='weighted')
                    
                    col1, col2, col3, col4 = st.columns(4)
                    with col1:
                        st.metric("Accuracy", f"{accuracy:.4f}")
                    with col2:
                        st.metric("Precision", f"{precision:.4f}")
                    with col3:
                        st.metric("Recall", f"{recall:.4f}")
                    with col4:
                        st.metric("F1-Score", f"{f1:.4f}")
                    
                    # Confusion Matrix
                    cm = confusion_matrix(y_test, predictions)
                    fig, ax = plt.subplots(figsize=(8, 6))
                    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
                    ax.set_title('Confusion Matrix')
                    ax.set_xlabel('Predicted')
                    ax.set_ylabel('Actual')
                    st.pyplot(fig)
                
                # Feature importance (for tree-based models)
                if hasattr(model, 'feature_importances_'):
                    st.subheader("📊 Feature Importance")
                    importance_df = pd.DataFrame({
                        'Feature': selected_features,
                        'Importance': model.feature_importances_
                    }).sort_values('Importance', ascending=False)
                    
                    fig = px.bar(importance_df, x='Importance', y='Feature', 
                               orientation='h', title='Feature Importance')
                    st.plotly_chart(fig, use_container_width=True)
                
            except Exception as e:
                st.error(f"❌ Error training model: {str(e)}")

elif selected_page == "⚡ PyCaret AutoML":
    st.header("⚡ PyCaret AutoML")
    
    if not PYCARET_AVAILABLE:
        st.error("❌ PyCaret is not installed. Please install it to use AutoML features.")
        st.stop()
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first from the Data Loading page")
        st.stop()
    
    df = st.session_state.df
    
    # AutoML Configuration
    st.subheader("⚙️ AutoML Configuration")
    
    col1, col2 = st.columns(2)
    
    with col1:
        target_col = st.selectbox("🎯 Select target variable:", df.columns, key="pycaret_target")
        
        # Auto-detect problem type
        if df[target_col].dtype in ['object', 'bool'] or df[target_col].nunique() < 10:
            problem_type = "classification"
            st.info("🎯 Detected: Classification Problem")
        else:
            problem_type = "regression"
            st.info("📈 Detected: Regression Problem")
    
    with col2:
        train_size = st.slider("🔄 Training set size:", 0.5, 0.9, 0.8, 0.05)
        sample_size = st.slider("📊 Sample size (for performance):", 500, min(5000, len(df)), min(2000, len(df)))
        
        if len(df) > sample_size:
            df_sample = df.sample(n=sample_size, random_state=42)
            st.info(f"📊 Using {sample_size} samples for faster processing")
        else:
            df_sample = df.copy()
    
    # Advanced settings
    with st.expander("🔧 Advanced Settings"):
        col1, col2 = st.columns(2)
        with col1:
            cross_validation = st.checkbox("🔄 Cross Validation", value=True)
            normalize = st.checkbox("📏 Normalize Features", value=True)
        with col2:
            remove_outliers = st.checkbox("🚫 Remove Outliers", value=False)
            feature_selection = st.checkbox("🎯 Feature Selection", value=False)
    
    # Setup PyCaret Environment
    if st.button("🚀 Setup PyCaret Environment"):
        with st.spinner("Setting up PyCaret environment..."):
            try:
                if problem_type == "classification":
                    st.session_state.pycaret_exp = cls_setup(
                        data=df_sample,
                        target=target_col,
                        train_size=train_size,
                        session_id=42,
                        normalize=normalize,
                        remove_outliers=remove_outliers,
                        feature_selection=feature_selection,
                        silent=True
                    )
                else:
                    st.session_state.pycaret_exp = reg_setup(
                        data=df_sample,
                        target=target_col,
                        train_size=train_size,
                        session_id=42,
                        normalize=normalize,
                        remove_outliers=remove_outliers,
                        feature_selection=feature_selection,
                        silent=True
                    )
                
                st.session_state.pycaret_setup_done = True
                st.session_state.pycaret_problem_type = problem_type
                st.success("✅ PyCaret environment setup complete!")
                
            except Exception as e:
                st.error(f"❌ Error setting up PyCaret: {str(e)}")
    
    # Model Comparison
    if st.session_state.pycaret_setup_done:
        st.subheader("📊 Model Comparison")
        
        if st.button("🔄 Compare Models"):
            with st.spinner("Comparing multiple models..."):
                try:
                    if st.session_state.pycaret_problem_type == "classification":
                        comparison_df = cls_compare(
                            include=['lr', 'rf', 'et', 'nb', 'dt', 'svm'],
                            sort='Accuracy',
                            n_select=5
                        )
                        st.session_state.model_comparison = cls_pull()
                    else:
                        comparison_df = reg_compare(
                            include=['lr', 'rf', 'et', 'dt', 'huber'],
                            sort='R2',
                            n_select=5
                        )
                        st.session_state.model_comparison = reg_pull()
                    
                    st.success("✅ Model comparison complete!")
                    
                except Exception as e:
                    st.error(f"❌ Error comparing models: {str(e)}")
        
        # Display comparison results
        if st.session_state.model_comparison is not None:
            st.subheader("📈 Model Comparison Results")
            st.dataframe(st.session_state.model_comparison, use_container_width=True)
            
            # Select best model
            best_model_name = st.selectbox(
                "🏆 Select model for tuning:",
                ['lr', 'rf', 'et', 'dt', 'nb', 'svm'] if st.session_state.pycaret_problem_type == "classification"
                else ['lr', 'rf', 'et', 'dt', 'huber']
            )
            
            # Create and tune model
            col1, col2 = st.columns(2)
            
            with col1:
                if st.button("🎯 Create Model"):
                    with st.spinner("Creating model..."):
                        try:
                            if st.session_state.pycaret_problem_type == "classification":
                                model = cls_create(best_model_name)
                            else:
                                model = reg_create(best_model_name)
                            
                            st.session_state.pycaret_model = model
                            st.success("✅ Model created successfully!")
                            
                        except Exception as e:
                            st.error(f"❌ Error creating model: {str(e)}")
            
            with col2:
                if st.button("⚡ Tune Hyperparameters"):
                    if 'pycaret_model' in st.session_state:
                        with st.spinner("Tuning hyperparameters..."):
                            try:
                                if st.session_state.pycaret_problem_type == "classification":
                                    tuned_model = cls_tune(st.session_state.pycaret_model, 
                                                         optimize='Accuracy', n_iter=10)
                                else:
                                    tuned_model = reg_tune(st.session_state.pycaret_model, 
                                                         optimize='R2', n_iter=10)
                                
                                st.session_state.tuned_model = tuned_model
                                st.success("✅ Hyperparameter tuning complete!")
                                
                            except Exception as e:
                                st.error(f"❌ Error tuning model: {str(e)}")
                    else:
                        st.warning("⚠️ Please create a model first")
            
            # Finalize model
            if st.button("🏁 Finalize Best Model"):
                if 'tuned_model' in st.session_state:
                    model_to_finalize = st.session_state.tuned_model
                elif 'pycaret_model' in st.session_state:
                    model_to_finalize = st.session_state.pycaret_model
                else:
                    st.warning("⚠️ Please create a model first")
                    model_to_finalize = None
                
                if model_to_finalize is not None:
                    with st.spinner("Finalizing model..."):
                        try:
                            if st.session_state.pycaret_problem_type == "classification":
                                final_model = cls_finalize(model_to_finalize)
                            else:
                                final_model = reg_finalize(model_to_finalize)
                            
                            st.session_state.best_model = final_model
                            st.success("✅ Model finalized successfully!")
                            
                        except Exception as e:
                            st.error(f"❌ Error finalizing model: {str(e)}")

elif selected_page == "🎯 Model Evaluation":
    st.header("🎯 Advanced Model Evaluation")
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first")
        st.stop()
    
    # Check for available models
    available_models = []
    if st.session_state.trained_models:
        available_models.extend(list(st.session_state.trained_models.keys()))
    if 'best_model' in st.session_state and st.session_state.best_model is not None:
        available_models.append("PyCaret Best Model")
    
    if not available_models:
        st.warning("⚠️ No trained models available. Please train a model first.")
        st.stop()
    
    selected_model_name = st.selectbox("📊 Select model to evaluate:", available_models)
    
    if selected_model_name == "PyCaret Best Model":
        if 'best_model' not in st.session_state:
            st.error("❌ PyCaret model not available")
            st.stop()
        
        model_info = st.session_state.best_model
        problem_type = st.session_state.get('pycaret_problem_type', 'regression')
        
        st.subheader("📈 PyCaret Model Evaluation")
        
        # PyCaret built-in plots
        if PYCARET_AVAILABLE:
            col1, col2 = st.columns(2)
            
            with col1:
                plot_types_cls = ['auc', 'confusion_matrix', 'class_report', 'pr', 'feature']
                plot_types_reg = ['residuals', 'feature', 'rfe', 'learning', 'vc']
                
                plot_types = plot_types_cls if problem_type == "classification" else plot_types_reg
                selected_plot = st.selectbox("📊 Select evaluation plot:", plot_types)
            
            with col2:
                if st.button("📊 Generate Plot"):
                    try:
                        with st.spinner("Generating plot..."):
                            if problem_type == "classification":
                                cls_plot(model_info, plot=selected_plot, display_format='streamlit')
                            else:
                                reg_plot(model_info, plot=selected_plot, display_format='streamlit')
                    except Exception as e:
                        st.error(f"❌ Error generating plot: {str(e)}")
        
        # Model predictions
        if st.button("🔮 Generate Predictions"):
            try:
                with st.spinner("Generating predictions..."):
                    if problem_type == "classification":
                        predictions_df = cls_predict(model_info)
                    else:
                        predictions_df = reg_predict(model_info)
                    
                    st.subheader("🔮 Model Predictions")
                    st.dataframe(predictions_df.head(20), use_container_width=True)
                    
                    # Download predictions
                    csv = predictions_df.to_csv(index=False)
                    st.download_button(
                        label="📥 Download Predictions",
                        data=csv,
                        file_name=f"predictions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
                        mime='text/csv'
                    )
            except Exception as e:
                st.error(f"❌ Error generating predictions: {str(e)}")
    
    else:
        # Classical ML model evaluation
        model_data = st.session_state.trained_models[selected_model_name]
        model = model_data['model']
        X_test = model_data['X_test']
        y_test = model_data['y_test']
        predictions = model_data['predictions']
        problem_type = model_data['problem_type']
        
        st.subheader(f"📊 {selected_model_name} Evaluation")
        
        if problem_type == "Regression":
            # Regression metrics
            mse = mean_squared_error(y_test, predictions)
            mae = mean_absolute_error(y_test, predictions)
            r2 = r2_score(y_test, predictions)
            rmse = np.sqrt(mse)
            
            col1, col2, col3, col4 = st.columns(4)
            with col1:
                st.metric("RMSE", f"{rmse:.4f}")
            with col2:
                st.metric("MAE", f"{mae:.4f}")
            with col3:
                st.metric("R² Score", f"{r2:.4f}")
            with col4:
                st.metric("MSE", f"{mse:.4f}")
            
            # Residual analysis
            residuals = y_test - predictions
            
            col1, col2 = st.columns(2)
            
            with col1:
                # Residual plot
                fig = px.scatter(x=predictions, y=residuals,
                               labels={'x': 'Predicted', 'y': 'Residuals'},
                               title='Residual Plot')
                fig.add_hline(y=0, line_dash="dash", line_color="red")
                st.plotly_chart(fig, use_container_width=True)
            
            with col2:
                # Residual distribution
                fig = px.histogram(residuals, title='Residual Distribution',
                                 labels={'value': 'Residuals', 'count': 'Frequency'})
                st.plotly_chart(fig, use_container_width=True)
        
        else:
            # Classification metrics
            accuracy = accuracy_score(y_test, predictions)
            precision = precision_score(y_test, predictions, average='weighted')
            recall = recall_score(y_test, predictions, average='weighted')
            f1 = f1_score(y_test, predictions, average='weighted')
            
            col1, col2, col3, col4 = st.columns(4)
            with col1:
                st.metric("Accuracy", f"{accuracy:.4f}")
            with col2:
                st.metric("Precision", f"{precision:.4f}")
            with col3:
                st.metric("Recall", f"{recall:.4f}")
            with col4:
                st.metric("F1-Score", f"{f1:.4f}")

elif selected_page == "🔬 Explainability":
    st.header("🔬 Model Explainability with SHAP")
    
    if not SHAP_AVAILABLE:
        st.warning("⚠️ SHAP is not installed. Explainability features are limited.")
        st.stop()
    
    if st.session_state.df is None:
        st.warning("⚠️ Please load data first")
        st.stop()
    
    # Check for available models
    if not st.session_state.trained_models and 'best_model' not in st.session_state:
        st.warning("⚠️ No trained models available. Please train a model first.")
        st.stop()
    
    # Select model for explanation
    available_models = list(st.session_state.trained_models.keys())
    if 'best_model' in st.session_state:
        available_models.append("PyCaret Best Model")
    
    selected_model = st.selectbox("🤖 Select model to explain:", available_models)
    
    if selected_model != "PyCaret Best Model":
        model_data = st.session_state.trained_models[selected_model]
        model = model_data['model']
        features = model_data['features']
        X_test = model_data['X_test']
        
        # SHAP Explanation
        st.subheader("🔬 SHAP Analysis")
        
        try:
            # Create SHAP explainer
            with st.spinner("Creating SHAP explainer..."):
                explainer = shap.Explainer(model, X_test.iloc[:100])  # Use subset for performance
                shap_values = explainer(X_test.iloc[:100])
            
            # Global feature importance
            st.subheader("🌍 Global Feature Importance")
            fig, ax = plt.subplots()
            shap.plots.bar(shap_values, ax=ax, show=False)
            st.pyplot(fig)
            
            # Summary plot
            st.subheader("📊 Feature Impact Summary")
            fig, ax = plt.subplots()
            shap.plots.beeswarm(shap_values, ax=ax, show=False)
            st.pyplot(fig)
            
            # Individual prediction explanation
            st.subheader("🔍 Individual Prediction Explanation")
            instance_idx = st.slider("Select instance:", 0, len(X_test)-1, 0)
            
            fig, ax = plt.subplots()
            shap.plots.waterfall(shap_values[instance_idx], ax=ax, show=False)
            st.pyplot(fig)
            
            # Feature dependence
            if len(features) > 1:
                st.subheader("📈 Feature Dependence")
                feature_for_dependence = st.selectbox("Select feature:", features)
                
                if feature_for_dependence in X_test.columns:
                    fig, ax = plt.subplots()
                    shap.plots.scatter(shap_values[:, feature_for_dependence], ax=ax, show=False)
                    st.pyplot(fig)
        
        except Exception as e:
            st.error(f"❌ Error generating SHAP explanations: {str(e)}")
            st.info("💡 SHAP works best with tree-based models (Random Forest, XGBoost, etc.)")

elif selected_page == "📋 MLflow Tracking":
    st.header("📋 MLflow Experiment Tracking")
    
    if not MLFLOW_AVAILABLE:
        st.warning("⚠️ MLflow is not installed. Install it to use experiment tracking.")
        st.stop()
    
    # MLflow Configuration
    st.subheader("⚙️ MLflow Configuration")
    
    col1, col2 = st.columns(2)
    
    with col1:
        tracking_uri = st.text_input("🔗 Tracking URI:", "http://localhost:5000")
        experiment_name = st.text_input("🧪 Experiment Name:", "super_app_experiments")
    
    with col2:
        if st.button("🔧 Set MLflow Configuration"):
            try:
                mlflow.set_tracking_uri(tracking_uri)
                mlflow.set_experiment(experiment_name)
                st.success("✅ MLflow configuration set!")
            except Exception as e:
                st.error(f"❌ Error setting MLflow: {str(e)}")
    
    # Log current models
    st.subheader("📊 Log Models to MLflow")
    
    if st.session_state.trained_models:
        model_to_log = st.selectbox("Select model to log:", list(st.session_state.trained_models.keys()))
        
        if st.button("📤 Log Model"):
            try:
                with mlflow.start_run(run_name=f"{model_to_log}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
                    model_data = st.session_state.trained_models[model_to_log]
                    model = model_data['model']
                    
                    # Log model
                    mlflow.sklearn.log_model(model, "model")
                    
                    # Log parameters
                    mlflow.log_param("model_type", model_to_log)
                    mlflow.log_param("features", model_data['features'])
                    mlflow.log_param("target", model_data['target'])
                    
                    # Log metrics (if available)
                    if 'predictions' in model_data:
                        y_test = model_data['y_test']
                        predictions = model_data['predictions']
                        
                        if model_data['problem_type'] == "Regression":
                            mlflow.log_metric("mse", mean_squared_error(y_test, predictions))
                            mlflow.log_metric("mae", mean_absolute_error(y_test, predictions))
                            mlflow.log_metric("r2", r2_score(y_test, predictions))
                        else:
                            mlflow.log_metric("accuracy", accuracy_score(y_test, predictions))
                    
                    st.success("✅ Model logged to MLflow!")
            
            except Exception as e:
                st.error(f"❌ Error logging model: {str(e)}")
    
    # Display recent runs
    st.subheader("📈 Recent Experiment Runs")
    
    if st.button("🔄 Refresh Runs"):
        try:
            runs = mlflow.search_runs(order_by=["start_time desc"])
            if not runs.empty:
                st.dataframe(runs[['run_id', 'status', 'start_time', 'params.model_type', 
                                 'metrics.mse', 'metrics.r2', 'metrics.accuracy']], 
                           use_container_width=True)
            else:
                st.info("📊 No runs found. Start logging some models!")
        except Exception as e:
            st.error(f"❌ Error fetching runs: {str(e)}")

elif selected_page == "🚀 Model Deployment":
    st.header("🚀 Model Deployment & Export")
    
    if not st.session_state.trained_models and 'best_model' not in st.session_state:
        st.warning("⚠️ No trained models available for deployment.")
        st.stop()
    
    # Model selection for deployment
    available_models = list(st.session_state.trained_models.keys())
    if 'best_model' in st.session_state:
        available_models.append("PyCaret Best Model")
    
    selected_model = st.selectbox("🤖 Select model for deployment:", available_models)
    
    # Model export options
    st.subheader("💾 Export Options")
    
    col1, col2, col3 = st.columns(3)
    
    with col1:
        if st.button("📦 Export Model (Pickle)"):
            try:
                import pickle
                
                if selected_model == "PyCaret Best Model":
                    model_to_export = st.session_state.best_model
                else:
                    model_to_export = st.session_state.trained_models[selected_model]['model']
                
                # Serialize model
                model_bytes = pickle.dumps(model_to_export)
                
                st.download_button(
                    label="📥 Download Model",
                    data=model_bytes,
                    file_name=f"{selected_model.replace(' ', '_')}_model.pkl",
                    mime="application/octet-stream"
                )
                st.success("✅ Model ready for download!")
                
            except Exception as e:
                st.error(f"❌ Error exporting model: {str(e)}")
    
    with col2:
        if st.button("📄 Generate Prediction Script"):
            # Generate Python script for predictions
            script_content = f'''
import pandas as pd
import pickle
import numpy as np

# Load the trained model
def load_model(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    return model

# Make predictions
def predict(model, input_data):
    """
    Make predictions using the trained model
    
    Parameters:
    model: Trained model object
    input_data: pandas DataFrame with features
    
    Returns:
    predictions: numpy array of predictions
    """
    predictions = model.predict(input_data)
    return predictions

# Example usage
if __name__ == "__main__":
    # Load your model
    model = load_model("path_to_your_model.pkl")
    
    # Create sample input data (replace with your actual data)
    sample_data = pd.DataFrame({{
        # Add your feature columns here
        # 'feature1': [value1],
        # 'feature2': [value2],
    }})
    
    # Make predictions
    predictions = predict(model, sample_data)
    print("Predictions:", predictions)
'''
            
            st.download_button(
                label="📥 Download Script",
                data=script_content,
                file_name=f"{selected_model.replace(' ', '_')}_prediction_script.py",
                mime="text/plain"
            )
            st.success("✅ Prediction script ready!")
    
    with col3:
        if st.button("🐳 Generate Dockerfile"):
            dockerfile_content = '''
FROM python:3.9-slim

WORKDIR /app

# Copy requirements
COPY requirements.txt .
RUN pip install -r requirements.txt

# Copy model and script
COPY model.pkl .
COPY app.py .

# Expose port
EXPOSE 8000

# Run the application
CMD ["python", "app.py"]
'''
            
            requirements_content = '''
pandas==1.5.3
scikit-learn==1.3.0
numpy==1.24.3
flask==2.3.2
'''
            
            col_a, col_b = st.columns(2)
            with col_a:
                st.download_button(
                    label="📥 Download Dockerfile",
                    data=dockerfile_content,
                    file_name="Dockerfile",
                    mime="text/plain"
                )
            with col_b:
                st.download_button(
                    label="📥 Download Requirements",
                    data=requirements_content,
                    file_name="requirements.txt",
                    mime="text/plain"
                )
            
            st.success("✅ Docker files ready!")
    
    # Model API endpoint generator
    st.subheader("🌐 API Endpoint Generator")
    
    if st.button("🔧 Generate Flask API"):
        api_code = f'''
from flask import Flask, request, jsonify
import pandas as pd
import pickle
import numpy as np

app = Flask(__name__)

# Load model at startup
model = None

def load_model():
    global model
    with open('model.pkl', 'rb') as f:
        model = pickle.load(f)

@app.route('/predict', methods=['POST'])
def predict():
    try:
        # Get data from request
        data = request.get_json()
        
        # Convert to DataFrame
        df = pd.DataFrame([data])
        
        # Make prediction
        prediction = model.predict(df)
        
        # Return result
        return jsonify({{
            'prediction': prediction.tolist(),
            'status': 'success'
        }})
    
    except Exception as e:
        return jsonify({{
            'error': str(e),
            'status': 'error'
        }}), 400

@app.route('/health', methods=['GET'])
def health():
    return jsonify({{'status': 'healthy'}})

if __name__ == '__main__':
    load_model()
    app.run(host='0.0.0.0', port=8000, debug=False)
'''
        
        st.download_button(
            label="📥 Download Flask API",
            data=api_code,
            file_name="app.py",
            mime="text/plain"
        )
        st.success("✅ Flask API code ready!")
    
    # Deployment instructions
    st.subheader("📋 Deployment Instructions")
    
    st.markdown("""
    ### 🚀 Deployment Steps:
    
    1. **Local Deployment:**
       - Download the model pickle file
       - Download the prediction script or Flask API
       - Install required dependencies: `pip install -r requirements.txt`
       - Run the application: `python app.py`
    
    2. **Docker Deployment:**
       - Download all generated files (Dockerfile, requirements.txt, app.py, model.pkl)
       - Build image: `docker build -t my-ml-app .`
       - Run container: `docker run -p 8000:8000 my-ml-app`
    
    3. **Cloud Deployment:**
       - **AWS**: Upload to EC2 or use ECS with the Docker image
       - **GCP**: Deploy to Google Cloud Run or App Engine
       - **Azure**: Use Azure Container Instances or App Service
       - **Heroku**: Push Docker image to Heroku Container Registry
    
    4. **API Usage Example:**
       ```bash
       curl -X POST http://localhost:8000/predict \
         -H "Content-Type: application/json" \
         -d '{"feature1": 1.0, "feature2": 2.0}'
       ```
    """)
    
    # Model performance summary
    if selected_model != "PyCaret Best Model" and selected_model in st.session_state.trained_models:
        st.subheader("📊 Model Summary for Deployment")
        
        model_data = st.session_state.trained_models[selected_model]
        
        col1, col2 = st.columns(2)
        
        with col1:
            st.write("**Model Details:**")
            st.write(f"- **Type:** {selected_model}")
            st.write(f"- **Problem Type:** {model_data['problem_type']}")
            st.write(f"- **Features:** {len(model_data['features'])}")
            st.write(f"- **Target:** {model_data['target']}")
        
        with col2:
            if 'predictions' in model_data:
                y_test = model_data['y_test']
                predictions = model_data['predictions']
                
                st.write("**Performance Metrics:**")
                if model_data['problem_type'] == "Regression":
                    r2 = r2_score(y_test, predictions)
                    mae = mean_absolute_error(y_test, predictions)
                    st.write(f"- **R² Score:** {r2:.4f}")
                    st.write(f"- **MAE:** {mae:.4f}")
                else:
                    accuracy = accuracy_score(y_test, predictions)
                    st.write(f"- **Accuracy:** {accuracy:.4f}")

# ================== FOOTER ==================
st.markdown("---")
col1, col2, col3 = st.columns(3)

with col1:
    st.markdown("### 📊 Quick Stats")
    if st.session_state.df is not None:
        st.write(f"Dataset: {st.session_state.df.shape[0]} rows × {st.session_state.df.shape[1]} cols")
    st.write(f"Models Trained: {len(st.session_state.trained_models)}")

with col2:
    st.markdown("### 🔗 Quick Actions")
    if st.button("🔄 Reset All Data", key="footer_reset"):
        for key in list(st.session_state.keys()):
            if key not in ['authenticated', 'demo_mode']:
                del st.session_state[key]
        st.success("✅ All data reset!")
        st.rerun()

with col3:
    st.markdown("### ℹ️ App Info")
    st.write("Super Data Science App v2.0")
    st.write(f"Session: {datetime.now().strftime('%Y-%m-%d %H:%M')}")

# ================== SIDEBAR STATUS ==================
st.sidebar.markdown("---")
st.sidebar.subheader("📊 Current Status")

# Data status
if st.session_state.df is not None:
    st.sidebar.success(f"✅ Data Loaded ({st.session_state.df.shape[0]} rows)")
else:
    st.sidebar.warning("⚠️ No Data Loaded")

# Models status
if st.session_state.trained_models:
    st.sidebar.success(f"✅ {len(st.session_state.trained_models)} Classical Models")
else:
    st.sidebar.info("ℹ️ No Classical Models")

if st.session_state.pycaret_setup_done:
    st.sidebar.success("✅ PyCaret Setup Complete")
else:
    st.sidebar.info("ℹ️ PyCaret Not Setup")

if st.session_state.dl_models:
    st.sidebar.success(f"✅ {len(st.session_state.dl_models)} Deep Learning Models")
else:
    st.sidebar.info("ℹ️ No Deep Learning Models")

# Available libraries status
st.sidebar.markdown("---")
st.sidebar.subheader("📚 Libraries Status")
st.sidebar.write(f"PyCaret: {'✅' if PYCARET_AVAILABLE else '❌'}")
st.sidebar.write(f"PyTorch: {'✅' if TORCH_AVAILABLE else '❌'}")
st.sidebar.write(f"MLflow: {'✅' if MLFLOW_AVAILABLE else '❌'}")
st.sidebar.write(f"SHAP: {'✅' if SHAP_AVAILABLE else '❌'}")
#st.sidebar.write(f"Profiling: {'✅' if PROFILING_AVAILABLE else '❌'}")

# Help section
st.sidebar.markdown("---")
st.sidebar.subheader("❓ Need Help?")
st.sidebar.markdown("""
**Quick Start:**
1. 📊 Load data (sample or upload)
2. 🔍 Explore with EDA
3. 🤖 Train models (Classical or AutoML)
4. 🎯 Evaluate performance
5. 🚀 Deploy your model

**Tips:**
- Use sample data for quick testing
- PyCaret AutoML for best results
- Export models for production use
""")

# Advanced features hint
if st.sidebar.button("🎯 Show Advanced Tips"):
    st.sidebar.info("""
    **Advanced Features:**
    - Feature engineering in EDA
    - Hyperparameter tuning in Classical ML
    - Cross-validation in PyCaret
    - SHAP explanations for interpretability
    - MLflow for experiment tracking
    - Docker deployment ready
    """)

# Debug mode for development
if st.sidebar.checkbox("🐛 Debug Mode", key="debug_mode"):
    st.sidebar.subheader("🔧 Debug Info")
    st.sidebar.write("Session State Keys:")
    for key in st.session_state.keys():
        if not key.startswith('_'):
            st.sidebar.write(f"- {key}")

# Performance optimization note
st.sidebar.markdown("---")
st.sidebar.caption("💡 For large datasets, consider using data sampling for faster processing")
st.sidebar.caption(f"⏰ Last updated: {datetime.now().strftime('%H:%M:%S')}")

# Auto-refresh data (for development)
if st.sidebar.button("🔄 Auto Refresh", key="auto_refresh"):
    st.rerun()

# Export session state
if st.sidebar.button("💾 Export Session", key="export_session"):
    session_data = {
        'trained_models_count': len(st.session_state.trained_models),
        'data_loaded': st.session_state.df is not None,
        'pycaret_setup': st.session_state.pycaret_setup_done,
        'timestamp': datetime.now().isoformat()
    }
    
    st.sidebar.download_button(
        label="📥 Download Session Info",
        data=str(session_data),
        file_name=f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
        mime="text/plain"
    )

# Success message for completion
if (st.session_state.df is not None and 
    st.session_state.trained_models and 
    st.session_state.pycaret_setup_done):
    
    st.sidebar.success("🎉 Full Pipeline Complete!")
    st.sidebar.balloons()

# Warning for missing dependencies
missing_deps = []
if not PYCARET_AVAILABLE:
    missing_deps.append("pycaret")
if not MLFLOW_AVAILABLE:
    missing_deps.append("mlflow")
if not SHAP_AVAILABLE:
    missing_deps.append("shap")
#if not PROFILING_AVAILABLE:
#    missing_deps.append("ydata-profiling")

if missing_deps:
    st.sidebar.warning(f"⚠️ Missing: {', '.join(missing_deps)}")
    st.sidebar.code(f"pip install {' '.join(missing_deps)}")

# Fun facts
fun_facts = [
    "🧠 Machine Learning can predict with 95%+ accuracy in many domains",
    "🚀 AutoML can save 80% of model development time",
    "📊 Feature engineering often provides the biggest performance boost",
    "🔬 Model explainability is crucial for production deployment",
    "⚡ Ensemble methods usually outperform single models",
    "📈 Cross-validation prevents overfitting better than simple train/test split"
]

import random
if st.sidebar.button("💡 Random ML Tip", key="random_tip"):
    st.sidebar.info(random.choice(fun_facts))

# Resource links
st.sidebar.markdown("---")
st.sidebar.subheader("📚 Resources")
st.sidebar.markdown("""
- [PyCaret Documentation](https://pycaret.org/)
- [MLflow Documentation](https://mlflow.org/)
- [SHAP Tutorials](https://shap.readthedocs.io/)
- [Scikit-learn Guide](https://scikit-learn.org/)
""")

# Version info and credits
st.sidebar.markdown("---")
st.sidebar.caption("🚀 Super Data Science App")
st.sidebar.caption("Version 2.0 - Full Pipeline")
st.sidebar.caption("Built with Streamlit ❤️")