Spaces:

VLAI-AIVN
/

AIO2025M06_DEMO_MLP_REGRESSION

Sleeping

AIO2025M06_DEMO_MLP_REGRESSION
File size: 34,378 Bytes
import gradio as gr
import pandas as pd
import vlai_template

# Import MLP core (backend implementation)
try:
    from src import mlp_regression
    MLP_AVAILABLE = True
except ImportError as e:
    print(f"❌ MLP module failed to load: {str(e)}")
    MLP_AVAILABLE = False
    mlp_regression = None

vlai_template.configure(
    project_name="MLP (Multi-Layer Perceptron) Regression Demo",
    year="2025",
    module="06",
    description="Interactive demonstration of Multi-Layer Perceptron (MLP) for regression. Build, train, and visualize neural networks with customizable architectures, activation functions, optimizers, and regularization techniques.",
    colors={
        "primary": "#1976D2",
        "accent": "#7B1FA2", 
        "bg1": "#E3F2FD",
        "bg2": "#BBDEFB",
        "bg3": "#90CAF9",
    },
    font_family="'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif"
)

current_dataframe = None

def load_sample_data_fallback(dataset_choice="California Housing"):
    from sklearn.datasets import fetch_california_housing, make_regression
    import pandas as pd
    import numpy as np
    
    def sklearn_to_df(data):
        df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
        if df.columns.isnull().any():
            df.columns = [f"feature_{i}" for i in range(df.shape[1])]
        df["target"] = data.target
        return df
    
    def synthetic_regression():
        X, y = make_regression(n_samples=1000, n_features=20, n_informative=15, 
                              noise=10.0, random_state=42)
        df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
        df["target"] = y
        return df
    
    datasets = {
        "California Housing": lambda: sklearn_to_df(fetch_california_housing()),
        "Synthetic": lambda: synthetic_regression(),
    }
    
    if dataset_choice not in datasets:
        raise ValueError(f"Unknown dataset: {dataset_choice}")
    return datasets[dataset_choice]()

def create_input_components_fallback(df, target_col):
    """Fallback input components creation when XGBoost is not available"""
    feature_cols = [c for c in df.columns if c != target_col]
    components = []
    for col in feature_cols:
        data = df[col]
        if data.dtype == "object":
            uniq = sorted(map(str, data.dropna().unique()))
            if not uniq:
                uniq = ["N/A"]
            components.append(
                {"name": col, "type": "dropdown", "choices": uniq, "value": uniq[0]}
            )
        else:
            val = pd.to_numeric(data, errors="coerce").dropna().mean()
            val = 0.0 if pd.isna(val) else float(val)
            components.append(
                {
                    "name": col,
                    "type": "number",
                    "value": round(val, 3),
                    "minimum": None,
                    "maximum": None,
                }
            )
    return components

SAMPLE_DATA_CONFIG = {
    "California Housing": {"target_column": "target", "problem_type": "regression"},
    "Synthetic": {"target_column": "target", "problem_type": "regression"},
}

force_light_theme_js = """
() => {
  const params = new URLSearchParams(window.location.search);
  if (!params.has('__theme')) {
    params.set('__theme', 'light');
    window.location.search = params.toString();
  }
}
"""

def validate_config(df, target_col):
    if not target_col or target_col not in df.columns:
        return False, "❌ Please select a valid target column from the dropdown.", None

    target_series = df[target_col]
    problem_type = "regression"
    
    if target_series.isnull().any():
        return False, "⚠️ Target column has missing values. Please clean your data.", None
    
    if target_series.dtype == "object":
        return False, "⚠️ Target must be numeric for regression. Please select a numeric column.", None
    
    try:
        pd.to_numeric(target_series, errors="raise")
    except (ValueError, TypeError):
        return False, "⚠️ Target must be numeric for regression. Please select a numeric column.", None

    return True, f"\n✅ Configuration is valid! Ready for regression with continuous target values.", problem_type


def get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg):
    if is_sample:
        return f"✅ **Selected Dataset**: {dataset_choice} | **Target**: {target_col} | **Type**: {problem_type.title()}"
    elif target_col and problem_type:
        status_icon = "✅" if is_valid else "⚠️"
        return f"{status_icon} **Custom Data** | **Target**: {target_col} | **Type**: {problem_type.title()} | {validation_msg}"
    else:
        return "📁 **Custom data uploaded!** 👆 Please select target column above to continue."


def load_and_configure_data_simple(dataset_choice="California Housing"):
    global current_dataframe
    try:
        if not MLP_AVAILABLE:
            # Fallback data loading without core module
            df = load_sample_data_fallback(dataset_choice)
        else:
            df = mlp_regression.load_data(None, dataset_choice)
        
        current_dataframe = df
        
        target_options = df.columns.tolist()
        cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {})
        target_col = cfg.get("target_column")
        problem_type = cfg.get("problem_type")
        
        if target_col and target_col in target_options:
            is_valid, validation_msg, detected = validate_config(df, target_col)
            if detected:
                problem_type = detected
            status_msg = get_status_message(True, dataset_choice, target_col, problem_type, is_valid, validation_msg)
        else:
            # If target_col not in options, use first column as fallback
            target_col = target_options[0] if target_options else None
            status_msg = get_status_message(True, dataset_choice, target_col, problem_type, False, "")
            
        return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg]
        
    except Exception as e:
        current_dataframe = None
        return [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different dataset."]


def load_and_configure_data(file_obj=None, dataset_choice="California Housing"):
    global current_dataframe
    try:
        if not MLP_AVAILABLE:
            # Fallback data loading without core module
            if file_obj is not None:
                # Handle file upload fallback
                if file_obj.name.endswith(".csv"):
                    df = pd.read_csv(file_obj.name)
                elif file_obj.name.endswith((".xlsx", ".xls")):
                    df = pd.read_excel(file_obj.name)
                else:
                    raise ValueError("Unsupported format. Upload CSV or Excel files.")
            else:
                df = load_sample_data_fallback(dataset_choice)
        else:
            df = mlp_regression.load_data(file_obj, dataset_choice)
        
        current_dataframe = df

        target_options = df.columns.tolist()
        is_sample = file_obj is None

        if is_sample:
            cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {})
            target_col = cfg.get("target_column")
            problem_type = cfg.get("problem_type")
        else:
            target_col, problem_type = None, None

        if target_col:
            is_valid, validation_msg, detected = validate_config(df, target_col)
            if detected:
                problem_type = detected
            status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg)
        else:
            status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, False, "")

        input_updates = [gr.update(visible=False)] * 40
        inputs_visible = gr.update(visible=False)
        input_status = "⚙️ Configure target column above to enable feature inputs."

        if target_col and problem_type and (not is_sample or is_valid):
            try:
                if MLP_AVAILABLE:
                    components_info = mlp_regression.create_input_components(df, target_col)
                else:
                    components_info = create_input_components_fallback(df, target_col)
                for i in range(min(20, len(components_info))):
                    comp = components_info[i]
                    number_idx, dropdown_idx = i * 2, i * 2 + 1
                    if comp["type"] == "number":
                        upd = {"visible": True, "label": comp["name"], "value": comp["value"]}
                        if comp["minimum"] is not None:
                            upd["minimum"] = comp["minimum"]
                        if comp["maximum"] is not None:
                            upd["maximum"] = comp["maximum"]
                        input_updates[number_idx] = gr.update(**upd)
                        input_updates[dropdown_idx] = gr.update(visible=False)
                    else:
                        input_updates[number_idx] = gr.update(visible=False)
                        input_updates[dropdown_idx] = gr.update(
                            visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"]
                        )
                inputs_visible = gr.update(visible=True)
                input_status = f"📝 **Ready!** Enter values for {len(components_info)} features below, then click Run prediction. | {validation_msg}"
            except Exception as e:
                input_status = f"❌ Error generating inputs: {str(e)}"

        return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg] + input_updates + [inputs_visible, input_status]

    except Exception as e:
        current_dataframe = None
        empty = [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different file or dataset."]
        return empty + [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data loaded."]


def update_learning_rate_display(lr_power):
    """Update the display to show what the current learning rate slider value represents"""
    # Map slider value to actual learning rate
    lr_values = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
    lr_labels = ["1e-6", "1e-5", "1e-4", "1e-3", "1e-2", "1e-1", "1"]
    
    idx = int(lr_power)
    if 0 <= idx < len(lr_values):
        return f"**Current Learning Rate:** {lr_values[idx]} ({lr_labels[idx]})"
    else:
        return "**Current Learning Rate:** N/A"


def update_batch_size_display(batch_size_power, train_split):
    """Update the display to show what the current batch size slider value represents"""
    global current_dataframe
    df = current_dataframe
    
    if df is None or df.empty:
        return "**Current Batch Size:** N/A"
    
    # Calculate training set size
    train_size = int(len(df) * train_split)
    
    # Determine max power of 2 that fits in training size
    import math
    max_power = int(math.log2(train_size)) if train_size > 0 else 0
    
    # Convert slider value to batch size
    if batch_size_power >= max_power + 1:
        return f"**Current Batch Size:** Full Batch ({train_size} samples)"
    else:
        actual_batch_size = 2 ** int(batch_size_power)
        return f"**Current Batch Size:** {actual_batch_size} samples (2^{int(batch_size_power)})"


def update_batch_size_slider(df_preview, target_col, train_split):
    """Update batch size slider max based on training data size"""
    global current_dataframe
    df = current_dataframe
    
    if df is None or df.empty:
        return gr.update(maximum=10, value=10)
    
    # Calculate training set size
    train_size = int(len(df) * train_split)
    
    # Determine max power of 2 that fits in training size
    import math
    max_power = int(math.log2(train_size)) if train_size > 0 else 0
    
    # Slider goes from 0 to max_power+1 (where max_power+1 = Full Batch)
    new_max = max_power + 1
    
    # Set value to Full Batch by default
    return gr.update(maximum=new_max, value=new_max)


def _parse_layer_configs(*args):
    hidden_layers_config = []
    layer_configs = list(zip(args[::2], args[1::2]))
    
    for neurons, activation in layer_configs:
        if neurons is not None and neurons > 0:
            try:
                neurons_int = int(neurons)
                if neurons_int > 0:
                    hidden_layers_config.append({
                        'neurons': neurons_int,
                        'activation': activation if activation else 'relu'
                    })
            except (ValueError, TypeError):
                continue
    return hidden_layers_config

def update_configuration(df_preview, target_col):
    global current_dataframe
    df = current_dataframe

    if df is None or df.empty:
        return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data available.", "No data available."]
    if not target_col:
        return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "Select target column.", "Select target column."]

    try:
        is_valid, validation_msg, problem_type = validate_config(df, target_col)
        if not is_valid:
            return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"⚠️ {validation_msg}", f"⚠️ {validation_msg}"]

        if MLP_AVAILABLE:
            components_info = mlp_regression.create_input_components(df, target_col)
        else:
            components_info = create_input_components_fallback(df, target_col)
        input_updates = [gr.update(visible=False)] * 40
        for i in range(min(20, len(components_info))):
            comp = components_info[i]
            number_idx, dropdown_idx = i * 2, i * 2 + 1
            if comp["type"] == "number":
                upd = {"visible": True, "label": comp["name"], "value": comp["value"]}
                if comp["minimum"] is not None:
                    upd["minimum"] = comp["minimum"]
                if comp["maximum"] is not None:
                    upd["maximum"] = comp["maximum"]
                input_updates[number_idx] = gr.update(**upd)
                input_updates[dropdown_idx] = gr.update(visible=False)
            else:
                input_updates[number_idx] = gr.update(visible=False)
                input_updates[dropdown_idx] = gr.update(
                    visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"]
                )
        input_status = f"📝 Enter values for {len(components_info)} features | {validation_msg}"
        status_msg = f"✅ **Selected Dataset**: Custom Data | **Target**: {target_col} | **Type**: {problem_type.title()}"
        return input_updates + [gr.update(visible=True), input_status, status_msg]

    except Exception as e:
        return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"❌ Error: {str(e)}", f"❌ Error: {str(e)}"]


# MLP prediction function

def execute_prediction(df_preview, target_col, epochs, learning_rate_power, batch_size_power, 
                      train_test_split_ratio, optimizer_name, reg_type, reg_rate,
                      layer1_neurons, layer1_activation, layer2_neurons, layer2_activation,
                      layer3_neurons, layer3_activation, layer4_neurons, layer4_activation,
                      layer5_neurons, layer5_activation, layer6_neurons, layer6_activation,
                      layer7_neurons, layer7_activation, layer8_neurons, layer8_activation,
                      *input_values):
    global current_dataframe
    df = current_dataframe

    EMPTY_PLOT = None
    EMPTY_HTML = ""
    error_style = "<div style='background:#FFEBEE;border-left:6px solid #C62828;padding:14px 16px;border-radius:10px;'><strong>🧠 MLP (Multi-Layer Perceptron)</strong><br><br>{}</div>"

    # Check if MLP core is available
    if not MLP_AVAILABLE:
        return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("❌ MLP module is not available!<br><br>Please check the installation."))

    if df is None or df.empty:
        return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("No data available."))
    if not target_col:
        return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration incomplete."))

    is_valid, validation_msg, problem_type = validate_config(df, target_col)
    if not is_valid:
        return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration issue."))

    try:
        if MLP_AVAILABLE:
            components_info = mlp_regression.create_input_components(df, target_col)
        else:
            components_info = create_input_components_fallback(df, target_col)
        
        new_point_dict = {}
        for i, comp in enumerate(components_info):
            number_idx = i * 2
            v = input_values[number_idx] if number_idx < len(input_values) and input_values[number_idx] is not None else comp["value"]
            new_point_dict[comp["name"]] = v

        hidden_layers_config = _parse_layer_configs(
            layer1_neurons, layer1_activation,
            layer2_neurons, layer2_activation,
            layer3_neurons, layer3_activation,
            layer4_neurons, layer4_activation,
            layer5_neurons, layer5_activation,
            layer6_neurons, layer6_activation,
            layer7_neurons, layer7_activation,
            layer8_neurons, layer8_activation,
        )
        
        if len(hidden_layers_config) == 0:
            return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("⚠️ At least one hidden layer is required. Please configure at least Layer 1."))

        # Convert learning rate slider value to actual learning rate
        lr_values = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0]
        idx = int(learning_rate_power)
        if 0 <= idx < len(lr_values):
            lr_float = lr_values[idx]
        else:
            lr_float = 0.01  # Default fallback
        
        # Convert batch_size_power to actual batch size string
        train_size = int(len(df) * train_test_split_ratio)
        import math
        max_power = int(math.log2(train_size)) if train_size > 0 else 0
        
        if batch_size_power >= max_power + 1:
            batch_size_str = "Full Batch"
        else:
            actual_batch_size = 2 ** int(batch_size_power)
            batch_size_str = str(actual_batch_size)

        train_loss_fig, val_loss_fig, results_display, prediction = mlp_regression.run_mlp_and_visualize(
            df, target_col, new_point_dict, hidden_layers_config,
            epochs, lr_float, batch_size_str, train_test_split_ratio,
            optimizer_name, reg_type, reg_rate
        )

        return (train_loss_fig, val_loss_fig, results_display)

    except Exception as e:
        print(f"Execution error: {str(e)}")  # For debugging
        import traceback
        traceback.print_exc()
        return (EMPTY_PLOT, EMPTY_PLOT, error_style.format(f"Execution error: {str(e)}"))


# No tree visualization needed for MLP


with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo:
    vlai_template.create_header()
    
    gr.HTML(vlai_template.render_info_card(
        icon="🧠",
        title="About this MLP (Multi-Layer Perceptron) Regression Demo",
        description="Interactive demonstration of Multi-Layer Perceptron (MLP) for regression. Build, train, and visualize neural networks with customizable architectures, activation functions, optimizers, and regularization techniques. Experience real-time training metrics and predictions."
    ))
    
    gr.Markdown("### 🧠 **How to Use**: Select regression data → Configure target (continuous numeric values) → Set training parameters → Enter feature values → Run training!")

    with gr.Row(equal_height=False, variant="panel"):
        with gr.Column(scale=45):
            with gr.Accordion("📊 Data & Configuration", open=True):
                with gr.Row():
                    with gr.Column(scale=1):
                        gr.Markdown("Start with sample datasets or upload your own CSV/Excel files.")
                        file_upload = gr.File(label="📁 Upload Your Data", file_types=[".csv", ".xlsx", ".xls"])
                    with gr.Column(scale=3):
                        sample_dataset = gr.Dropdown(choices=list(SAMPLE_DATA_CONFIG.keys()), value="California Housing", label="🗂️ Sample Datasets")

                with gr.Row():
                    target_column = gr.Dropdown(choices=[], label="🎯 Target Column", interactive=True)

                status_message = gr.Markdown("🔄 Loading sample data...")
                data_preview = gr.DataFrame(label="📋 Data Preview (First 5 Rows)", row_count=5, interactive=False, max_height=250)

            with gr.Accordion("🧠 MLP Architecture", open=True):
                gr.Markdown("**🏗️ Configure Hidden Layers** (Up to 8 layers)")
                with gr.Row():
                    layer1_neurons = gr.Number(label="Layer 1 Neurons", value=8, minimum=1, maximum=64, precision=0, info="Number of neurons")
                    layer1_activation = gr.Dropdown(label="Layer 1 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu", info="Activation function")
                with gr.Row():
                    layer2_neurons = gr.Number(label="Layer 2 Neurons", value=4, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer2_activation = gr.Dropdown(label="Layer 2 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer3_neurons = gr.Number(label="Layer 3 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer3_activation = gr.Dropdown(label="Layer 3 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer4_neurons = gr.Number(label="Layer 4 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer4_activation = gr.Dropdown(label="Layer 4 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer5_neurons = gr.Number(label="Layer 5 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer5_activation = gr.Dropdown(label="Layer 5 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer6_neurons = gr.Number(label="Layer 6 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer6_activation = gr.Dropdown(label="Layer 6 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer7_neurons = gr.Number(label="Layer 7 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer7_activation = gr.Dropdown(label="Layer 7 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")
                with gr.Row():
                    layer8_neurons = gr.Number(label="Layer 8 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable")
                    layer8_activation = gr.Dropdown(label="Layer 8 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu")

            with gr.Accordion("📊 Training Parameters & Input", open=True):
                gr.Markdown("**🧠 MLP (Multi-Layer Perceptron) Parameters**")
                with gr.Row():
                    epochs = gr.Number(
                        label="Number of Epochs",
                        value=100, minimum=1, maximum=1000, precision=0,
                        info="Number of training iterations"
                    )
                    learning_rate_slider = gr.Slider(
                        label="Learning Rate (Power of 10)",
                        value=4, minimum=0, maximum=6, step=1,
                        info="0=1e-6, 1=1e-5, 2=1e-4, 3=1e-3, 4=1e-2, 5=1e-1, 6=1"
                    )
                    learning_rate_display = gr.Markdown("**Current Learning Rate:** 0.01")
                    batch_size_slider = gr.Slider(
                        label="Batch Size (Power of 2)",
                        value=10, minimum=0, maximum=10, step=1,
                        info="Slide to select: 0=1, 1=2, 2=4, 3=8, ... Max=Full Batch"
                    )
                    batch_size_display = gr.Markdown("**Current Batch Size:** Full Batch")
                
                with gr.Row():
                    optimizer_name = gr.Dropdown(
                        label="Optimizer",
                        choices=["adam", "sgd", "rmsprop"],
                        value="adam",
                        info="Optimization algorithm (Adam recommended)"
                    )
                    reg_type = gr.Dropdown(
                        label="Regularization",
                        choices=["none", "l1", "l2"],
                        value="none",
                        info="Regularization type to prevent overfitting"
                    )
                    reg_rate = gr.Number(
                        label="Reg. Rate (λ)",
                        value=0.001, minimum=0, maximum=0.1, step=0.0001,
                        info="Regularization strength"
                    )

                gr.Markdown("**📊 Data Split Configuration**")
                with gr.Row():
                    train_test_split_ratio = gr.Slider(
                        label="Train/Validation Split Ratio",
                        value=0.8, minimum=0.6, maximum=0.9, step=0.05,
                        info="Proportion of data used for training (e.g., 0.8 = 80% train, 20% validation)"
                    )
                

                inputs_group = gr.Group(visible=False)
                with inputs_group:
                    input_status = gr.Markdown("Configure inputs above.")
                    gr.Markdown("**📝 New Data Point** - Enter feature values for prediction:")
                    input_components = []
                    for row in range(5):
                        with gr.Row():
                            for col in range(4):
                                idx = row * 4 + col
                                if idx < 20:
                                    number_comp = gr.Number(label=f"Feature {idx+1}", visible=False)
                                    dropdown_comp = gr.Dropdown(label=f"Feature {idx+1}", visible=False)
                                    input_components.extend([number_comp, dropdown_comp])

                run_prediction_btn = gr.Button("📊 Run Training & Prediction", variant="primary", size="lg")

        with gr.Column(scale=55):
            gr.Markdown("### 🧠 **MLP (Multi-Layer Perceptron) Results & Visualization**")
            
            train_loss_chart = gr.Plot(label="Training Loss & MAE Over Epochs", visible=True)
            val_loss_chart = gr.Plot(label="Validation Loss & MAE Over Epochs", visible=True)
            results_display = gr.HTML("**🧠 MLP (Multi-Layer Perceptron) Regression Results**<br><br>Training details will appear here showing model performance, learned parameters, and predictions.", label="🧠 Results & Predictions")

    gr.Markdown("""🧠 **MLP (Multi-Layer Perceptron) Regression Guide**:

**📈 Training Metrics**:
- **MSE (Mean Squared Error)**: Average squared difference between predicted and actual values. Lower MSE indicates better fit.
- **MAE (Mean Absolute Error)**: Average absolute difference between predicted and actual values. More interpretable than MSE.
- **R² (R-squared)**: Coefficient of determination. Measures how well the model explains variance. Closer to 1.0 is better.

**🏗️ Architecture Parameters**:
- **Hidden Layers**: Number of layers between input and output. More layers = more complex patterns, but risk of overfitting.
- **Neurons per Layer**: Width of each layer. More neurons = more capacity, but requires more data and computation.
- **Activation Functions**: ReLU (default), Sigmoid, Tanh, LeakyReLU. ReLU is most common for hidden layers.
- **Output Layer**: Linear activation for regression (predicts continuous values).

**🔧 Training Parameters**:
- **Epochs**: Number of complete passes through training data. More epochs = better learning, but watch for overfitting.
- **Learning Rate**: Step size for optimization. Recommended: 0.001 to 0.01. Too high may cause instability.
- **Batch Size**: Samples processed before updating parameters. 0 = Full Batch (most stable). Smaller = faster updates but noisier.
- **Optimizer**: Adam (recommended), SGD, RMSprop. Adam adapts learning rate automatically.
- **Regularization**: L1 or L2 to prevent overfitting. Higher λ = more regularization.
- **Train/Validation Split**: Proportion of data for training vs validation. Default 80/20 split.

**🧮 Algorithm Details**:
- **Multi-Layer Architecture**: Input → Hidden Layers → Output
- **Activation Functions**: ReLU/Tanh/Sigmoid for hidden layers, Linear for output
- **Mean Squared Error Loss**: Optimized for regression tasks
- **Feature Normalization**: Automatic standardization (zero mean, unit variance) for stable training
- **Target Normalization**: Target values are also normalized during training for better convergence
- **Backpropagation**: Gradient-based learning through multiple layers

**💡 Tips**:
- Start with simple architecture (1-2 hidden layers, 8-16 neurons)
- Use Adam optimizer with default learning rate (0.01)
- Monitor validation metrics (MSE, MAE, R²) to detect overfitting
- Add regularization (L2) if overfitting occurs
- Use batch size = Full Batch for most stable training
- Try different activation functions (ReLU is usually best for hidden layers)
- For regression, ensure target values are continuous numeric values
""")

    vlai_template.create_footer()

    load_evt = demo.load(
        fn=lambda: load_and_configure_data(None, "California Housing"),
        outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status],
    ).then(
        fn=update_batch_size_slider, 
        inputs=[data_preview, target_column, train_test_split_ratio],
        outputs=[batch_size_slider],
    ).then(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    ).then(
        fn=update_learning_rate_display,
        inputs=[learning_rate_slider],
        outputs=[learning_rate_display],
    )
    upload_evt = file_upload.upload(
        fn=lambda file: load_and_configure_data(file, "California Housing"),
        inputs=[file_upload],
        outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status],
    ).then(
        fn=update_batch_size_slider, 
        inputs=[data_preview, target_column, train_test_split_ratio],
        outputs=[batch_size_slider],
    ).then(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    )

    sample_dataset.change(
        fn=lambda choice: load_and_configure_data_simple(choice),
        inputs=[sample_dataset],
        outputs=[data_preview, target_column, status_message],
    ).then(
        fn=update_configuration, inputs=[data_preview, target_column],
        outputs=input_components + [inputs_group, input_status, status_message],
    ).then(
        fn=update_batch_size_slider, 
        inputs=[data_preview, target_column, train_test_split_ratio],
        outputs=[batch_size_slider],
    ).then(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    )

    target_column.change(
        fn=update_configuration, inputs=[data_preview, target_column],
        outputs=input_components + [inputs_group, input_status, status_message],
    ).then(
        fn=update_batch_size_slider, 
        inputs=[data_preview, target_column, train_test_split_ratio],
        outputs=[batch_size_slider],
    ).then(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    )
    
    # Update batch size display when slider or train/test split changes
    batch_size_slider.change(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    )
    
    train_test_split_ratio.change(
        fn=update_batch_size_slider, 
        inputs=[data_preview, target_column, train_test_split_ratio],
        outputs=[batch_size_slider],
    ).then(
        fn=update_batch_size_display,
        inputs=[batch_size_slider, train_test_split_ratio],
        outputs=[batch_size_display],
    )

    # Update learning rate display when slider changes
    learning_rate_slider.change(
        fn=update_learning_rate_display,
        inputs=[learning_rate_slider],
        outputs=[learning_rate_display],
    )
    
    run_prediction_btn.click(
        fn=execute_prediction,
        inputs=[data_preview, target_column, epochs, learning_rate_slider, batch_size_slider, 
                train_test_split_ratio, optimizer_name, reg_type, reg_rate,
                layer1_neurons, layer1_activation, layer2_neurons, layer2_activation,
                layer3_neurons, layer3_activation, layer4_neurons, layer4_activation,
                layer5_neurons, layer5_activation, layer6_neurons, layer6_activation,
                layer7_neurons, layer7_activation, layer8_neurons, layer8_activation] + input_components,
        outputs=[train_loss_chart, val_loss_chart, results_display],
    )

if __name__ == "__main__":
    demo.launch(allowed_paths=["static/aivn_logo.png", "static/vlai_logo.png", "static"])