Implement MLP regression demo with Gradio interface, including data loading, model training, and visualization features. Add README and requirements documentation, along with necessary package files.
4045778
| import gradio as gr | |
| import pandas as pd | |
| import vlai_template | |
| # Import MLP core (backend implementation) | |
| try: | |
| from src import mlp_regression | |
| MLP_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"❌ MLP module failed to load: {str(e)}") | |
| MLP_AVAILABLE = False | |
| mlp_regression = None | |
| vlai_template.configure( | |
| project_name="MLP (Multi-Layer Perceptron) Regression Demo", | |
| year="2025", | |
| module="06", | |
| description="Interactive demonstration of Multi-Layer Perceptron (MLP) for regression. Build, train, and visualize neural networks with customizable architectures, activation functions, optimizers, and regularization techniques.", | |
| colors={ | |
| "primary": "#1976D2", | |
| "accent": "#7B1FA2", | |
| "bg1": "#E3F2FD", | |
| "bg2": "#BBDEFB", | |
| "bg3": "#90CAF9", | |
| }, | |
| font_family="'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif" | |
| ) | |
| current_dataframe = None | |
| def load_sample_data_fallback(dataset_choice="California Housing"): | |
| from sklearn.datasets import fetch_california_housing, make_regression | |
| import pandas as pd | |
| import numpy as np | |
| def sklearn_to_df(data): | |
| df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None)) | |
| if df.columns.isnull().any(): | |
| df.columns = [f"feature_{i}" for i in range(df.shape[1])] | |
| df["target"] = data.target | |
| return df | |
| def synthetic_regression(): | |
| X, y = make_regression(n_samples=1000, n_features=20, n_informative=15, | |
| noise=10.0, random_state=42) | |
| df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])]) | |
| df["target"] = y | |
| return df | |
| datasets = { | |
| "California Housing": lambda: sklearn_to_df(fetch_california_housing()), | |
| "Synthetic": lambda: synthetic_regression(), | |
| } | |
| if dataset_choice not in datasets: | |
| raise ValueError(f"Unknown dataset: {dataset_choice}") | |
| return datasets[dataset_choice]() | |
| def create_input_components_fallback(df, target_col): | |
| """Fallback input components creation when XGBoost is not available""" | |
| feature_cols = [c for c in df.columns if c != target_col] | |
| components = [] | |
| for col in feature_cols: | |
| data = df[col] | |
| if data.dtype == "object": | |
| uniq = sorted(map(str, data.dropna().unique())) | |
| if not uniq: | |
| uniq = ["N/A"] | |
| components.append( | |
| {"name": col, "type": "dropdown", "choices": uniq, "value": uniq[0]} | |
| ) | |
| else: | |
| val = pd.to_numeric(data, errors="coerce").dropna().mean() | |
| val = 0.0 if pd.isna(val) else float(val) | |
| components.append( | |
| { | |
| "name": col, | |
| "type": "number", | |
| "value": round(val, 3), | |
| "minimum": None, | |
| "maximum": None, | |
| } | |
| ) | |
| return components | |
| SAMPLE_DATA_CONFIG = { | |
| "California Housing": {"target_column": "target", "problem_type": "regression"}, | |
| "Synthetic": {"target_column": "target", "problem_type": "regression"}, | |
| } | |
| force_light_theme_js = """ | |
| () => { | |
| const params = new URLSearchParams(window.location.search); | |
| if (!params.has('__theme')) { | |
| params.set('__theme', 'light'); | |
| window.location.search = params.toString(); | |
| } | |
| } | |
| """ | |
| def validate_config(df, target_col): | |
| if not target_col or target_col not in df.columns: | |
| return False, "❌ Please select a valid target column from the dropdown.", None | |
| target_series = df[target_col] | |
| problem_type = "regression" | |
| if target_series.isnull().any(): | |
| return False, "⚠️ Target column has missing values. Please clean your data.", None | |
| if target_series.dtype == "object": | |
| return False, "⚠️ Target must be numeric for regression. Please select a numeric column.", None | |
| try: | |
| pd.to_numeric(target_series, errors="raise") | |
| except (ValueError, TypeError): | |
| return False, "⚠️ Target must be numeric for regression. Please select a numeric column.", None | |
| return True, f"\n✅ Configuration is valid! Ready for regression with continuous target values.", problem_type | |
| def get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg): | |
| if is_sample: | |
| return f"✅ **Selected Dataset**: {dataset_choice} | **Target**: {target_col} | **Type**: {problem_type.title()}" | |
| elif target_col and problem_type: | |
| status_icon = "✅" if is_valid else "⚠️" | |
| return f"{status_icon} **Custom Data** | **Target**: {target_col} | **Type**: {problem_type.title()} | {validation_msg}" | |
| else: | |
| return "📁 **Custom data uploaded!** 👆 Please select target column above to continue." | |
| def load_and_configure_data_simple(dataset_choice="California Housing"): | |
| global current_dataframe | |
| try: | |
| if not MLP_AVAILABLE: | |
| # Fallback data loading without core module | |
| df = load_sample_data_fallback(dataset_choice) | |
| else: | |
| df = mlp_regression.load_data(None, dataset_choice) | |
| current_dataframe = df | |
| target_options = df.columns.tolist() | |
| cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {}) | |
| target_col = cfg.get("target_column") | |
| problem_type = cfg.get("problem_type") | |
| if target_col and target_col in target_options: | |
| is_valid, validation_msg, detected = validate_config(df, target_col) | |
| if detected: | |
| problem_type = detected | |
| status_msg = get_status_message(True, dataset_choice, target_col, problem_type, is_valid, validation_msg) | |
| else: | |
| # If target_col not in options, use first column as fallback | |
| target_col = target_options[0] if target_options else None | |
| status_msg = get_status_message(True, dataset_choice, target_col, problem_type, False, "") | |
| return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg] | |
| except Exception as e: | |
| current_dataframe = None | |
| return [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different dataset."] | |
| def load_and_configure_data(file_obj=None, dataset_choice="California Housing"): | |
| global current_dataframe | |
| try: | |
| if not MLP_AVAILABLE: | |
| # Fallback data loading without core module | |
| if file_obj is not None: | |
| # Handle file upload fallback | |
| if file_obj.name.endswith(".csv"): | |
| df = pd.read_csv(file_obj.name) | |
| elif file_obj.name.endswith((".xlsx", ".xls")): | |
| df = pd.read_excel(file_obj.name) | |
| else: | |
| raise ValueError("Unsupported format. Upload CSV or Excel files.") | |
| else: | |
| df = load_sample_data_fallback(dataset_choice) | |
| else: | |
| df = mlp_regression.load_data(file_obj, dataset_choice) | |
| current_dataframe = df | |
| target_options = df.columns.tolist() | |
| is_sample = file_obj is None | |
| if is_sample: | |
| cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {}) | |
| target_col = cfg.get("target_column") | |
| problem_type = cfg.get("problem_type") | |
| else: | |
| target_col, problem_type = None, None | |
| if target_col: | |
| is_valid, validation_msg, detected = validate_config(df, target_col) | |
| if detected: | |
| problem_type = detected | |
| status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg) | |
| else: | |
| status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, False, "") | |
| input_updates = [gr.update(visible=False)] * 40 | |
| inputs_visible = gr.update(visible=False) | |
| input_status = "⚙️ Configure target column above to enable feature inputs." | |
| if target_col and problem_type and (not is_sample or is_valid): | |
| try: | |
| if MLP_AVAILABLE: | |
| components_info = mlp_regression.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| for i in range(min(20, len(components_info))): | |
| comp = components_info[i] | |
| number_idx, dropdown_idx = i * 2, i * 2 + 1 | |
| if comp["type"] == "number": | |
| upd = {"visible": True, "label": comp["name"], "value": comp["value"]} | |
| if comp["minimum"] is not None: | |
| upd["minimum"] = comp["minimum"] | |
| if comp["maximum"] is not None: | |
| upd["maximum"] = comp["maximum"] | |
| input_updates[number_idx] = gr.update(**upd) | |
| input_updates[dropdown_idx] = gr.update(visible=False) | |
| else: | |
| input_updates[number_idx] = gr.update(visible=False) | |
| input_updates[dropdown_idx] = gr.update( | |
| visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"] | |
| ) | |
| inputs_visible = gr.update(visible=True) | |
| input_status = f"📝 **Ready!** Enter values for {len(components_info)} features below, then click Run prediction. | {validation_msg}" | |
| except Exception as e: | |
| input_status = f"❌ Error generating inputs: {str(e)}" | |
| return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg] + input_updates + [inputs_visible, input_status] | |
| except Exception as e: | |
| current_dataframe = None | |
| empty = [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different file or dataset."] | |
| return empty + [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data loaded."] | |
| def update_learning_rate_display(lr_power): | |
| """Update the display to show what the current learning rate slider value represents""" | |
| # Map slider value to actual learning rate | |
| lr_values = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0] | |
| lr_labels = ["1e-6", "1e-5", "1e-4", "1e-3", "1e-2", "1e-1", "1"] | |
| idx = int(lr_power) | |
| if 0 <= idx < len(lr_values): | |
| return f"**Current Learning Rate:** {lr_values[idx]} ({lr_labels[idx]})" | |
| else: | |
| return "**Current Learning Rate:** N/A" | |
| def update_batch_size_display(batch_size_power, train_split): | |
| """Update the display to show what the current batch size slider value represents""" | |
| global current_dataframe | |
| df = current_dataframe | |
| if df is None or df.empty: | |
| return "**Current Batch Size:** N/A" | |
| # Calculate training set size | |
| train_size = int(len(df) * train_split) | |
| # Determine max power of 2 that fits in training size | |
| import math | |
| max_power = int(math.log2(train_size)) if train_size > 0 else 0 | |
| # Convert slider value to batch size | |
| if batch_size_power >= max_power + 1: | |
| return f"**Current Batch Size:** Full Batch ({train_size} samples)" | |
| else: | |
| actual_batch_size = 2 ** int(batch_size_power) | |
| return f"**Current Batch Size:** {actual_batch_size} samples (2^{int(batch_size_power)})" | |
| def update_batch_size_slider(df_preview, target_col, train_split): | |
| """Update batch size slider max based on training data size""" | |
| global current_dataframe | |
| df = current_dataframe | |
| if df is None or df.empty: | |
| return gr.update(maximum=10, value=10) | |
| # Calculate training set size | |
| train_size = int(len(df) * train_split) | |
| # Determine max power of 2 that fits in training size | |
| import math | |
| max_power = int(math.log2(train_size)) if train_size > 0 else 0 | |
| # Slider goes from 0 to max_power+1 (where max_power+1 = Full Batch) | |
| new_max = max_power + 1 | |
| # Set value to Full Batch by default | |
| return gr.update(maximum=new_max, value=new_max) | |
| def _parse_layer_configs(*args): | |
| hidden_layers_config = [] | |
| layer_configs = list(zip(args[::2], args[1::2])) | |
| for neurons, activation in layer_configs: | |
| if neurons is not None and neurons > 0: | |
| try: | |
| neurons_int = int(neurons) | |
| if neurons_int > 0: | |
| hidden_layers_config.append({ | |
| 'neurons': neurons_int, | |
| 'activation': activation if activation else 'relu' | |
| }) | |
| except (ValueError, TypeError): | |
| continue | |
| return hidden_layers_config | |
| def update_configuration(df_preview, target_col): | |
| global current_dataframe | |
| df = current_dataframe | |
| if df is None or df.empty: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data available.", "No data available."] | |
| if not target_col: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "Select target column.", "Select target column."] | |
| try: | |
| is_valid, validation_msg, problem_type = validate_config(df, target_col) | |
| if not is_valid: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"⚠️ {validation_msg}", f"⚠️ {validation_msg}"] | |
| if MLP_AVAILABLE: | |
| components_info = mlp_regression.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| input_updates = [gr.update(visible=False)] * 40 | |
| for i in range(min(20, len(components_info))): | |
| comp = components_info[i] | |
| number_idx, dropdown_idx = i * 2, i * 2 + 1 | |
| if comp["type"] == "number": | |
| upd = {"visible": True, "label": comp["name"], "value": comp["value"]} | |
| if comp["minimum"] is not None: | |
| upd["minimum"] = comp["minimum"] | |
| if comp["maximum"] is not None: | |
| upd["maximum"] = comp["maximum"] | |
| input_updates[number_idx] = gr.update(**upd) | |
| input_updates[dropdown_idx] = gr.update(visible=False) | |
| else: | |
| input_updates[number_idx] = gr.update(visible=False) | |
| input_updates[dropdown_idx] = gr.update( | |
| visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"] | |
| ) | |
| input_status = f"📝 Enter values for {len(components_info)} features | {validation_msg}" | |
| status_msg = f"✅ **Selected Dataset**: Custom Data | **Target**: {target_col} | **Type**: {problem_type.title()}" | |
| return input_updates + [gr.update(visible=True), input_status, status_msg] | |
| except Exception as e: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"❌ Error: {str(e)}", f"❌ Error: {str(e)}"] | |
| # MLP prediction function | |
| def execute_prediction(df_preview, target_col, epochs, learning_rate_power, batch_size_power, | |
| train_test_split_ratio, optimizer_name, reg_type, reg_rate, | |
| layer1_neurons, layer1_activation, layer2_neurons, layer2_activation, | |
| layer3_neurons, layer3_activation, layer4_neurons, layer4_activation, | |
| layer5_neurons, layer5_activation, layer6_neurons, layer6_activation, | |
| layer7_neurons, layer7_activation, layer8_neurons, layer8_activation, | |
| *input_values): | |
| global current_dataframe | |
| df = current_dataframe | |
| EMPTY_PLOT = None | |
| EMPTY_HTML = "" | |
| error_style = "<div style='background:#FFEBEE;border-left:6px solid #C62828;padding:14px 16px;border-radius:10px;'><strong>🧠 MLP (Multi-Layer Perceptron)</strong><br><br>{}</div>" | |
| # Check if MLP core is available | |
| if not MLP_AVAILABLE: | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("❌ MLP module is not available!<br><br>Please check the installation.")) | |
| if df is None or df.empty: | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("No data available.")) | |
| if not target_col: | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration incomplete.")) | |
| is_valid, validation_msg, problem_type = validate_config(df, target_col) | |
| if not is_valid: | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration issue.")) | |
| try: | |
| if MLP_AVAILABLE: | |
| components_info = mlp_regression.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| new_point_dict = {} | |
| for i, comp in enumerate(components_info): | |
| number_idx = i * 2 | |
| v = input_values[number_idx] if number_idx < len(input_values) and input_values[number_idx] is not None else comp["value"] | |
| new_point_dict[comp["name"]] = v | |
| hidden_layers_config = _parse_layer_configs( | |
| layer1_neurons, layer1_activation, | |
| layer2_neurons, layer2_activation, | |
| layer3_neurons, layer3_activation, | |
| layer4_neurons, layer4_activation, | |
| layer5_neurons, layer5_activation, | |
| layer6_neurons, layer6_activation, | |
| layer7_neurons, layer7_activation, | |
| layer8_neurons, layer8_activation, | |
| ) | |
| if len(hidden_layers_config) == 0: | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format("⚠️ At least one hidden layer is required. Please configure at least Layer 1.")) | |
| # Convert learning rate slider value to actual learning rate | |
| lr_values = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0] | |
| idx = int(learning_rate_power) | |
| if 0 <= idx < len(lr_values): | |
| lr_float = lr_values[idx] | |
| else: | |
| lr_float = 0.01 # Default fallback | |
| # Convert batch_size_power to actual batch size string | |
| train_size = int(len(df) * train_test_split_ratio) | |
| import math | |
| max_power = int(math.log2(train_size)) if train_size > 0 else 0 | |
| if batch_size_power >= max_power + 1: | |
| batch_size_str = "Full Batch" | |
| else: | |
| actual_batch_size = 2 ** int(batch_size_power) | |
| batch_size_str = str(actual_batch_size) | |
| train_loss_fig, val_loss_fig, results_display, prediction = mlp_regression.run_mlp_and_visualize( | |
| df, target_col, new_point_dict, hidden_layers_config, | |
| epochs, lr_float, batch_size_str, train_test_split_ratio, | |
| optimizer_name, reg_type, reg_rate | |
| ) | |
| return (train_loss_fig, val_loss_fig, results_display) | |
| except Exception as e: | |
| print(f"Execution error: {str(e)}") # For debugging | |
| import traceback | |
| traceback.print_exc() | |
| return (EMPTY_PLOT, EMPTY_PLOT, error_style.format(f"Execution error: {str(e)}")) | |
| # No tree visualization needed for MLP | |
| with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo: | |
| vlai_template.create_header() | |
| gr.HTML(vlai_template.render_info_card( | |
| icon="🧠", | |
| title="About this MLP (Multi-Layer Perceptron) Regression Demo", | |
| description="Interactive demonstration of Multi-Layer Perceptron (MLP) for regression. Build, train, and visualize neural networks with customizable architectures, activation functions, optimizers, and regularization techniques. Experience real-time training metrics and predictions." | |
| )) | |
| gr.Markdown("### 🧠 **How to Use**: Select regression data → Configure target (continuous numeric values) → Set training parameters → Enter feature values → Run training!") | |
| with gr.Row(equal_height=False, variant="panel"): | |
| with gr.Column(scale=45): | |
| with gr.Accordion("📊 Data & Configuration", open=True): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("Start with sample datasets or upload your own CSV/Excel files.") | |
| file_upload = gr.File(label="📁 Upload Your Data", file_types=[".csv", ".xlsx", ".xls"]) | |
| with gr.Column(scale=3): | |
| sample_dataset = gr.Dropdown(choices=list(SAMPLE_DATA_CONFIG.keys()), value="California Housing", label="🗂️ Sample Datasets") | |
| with gr.Row(): | |
| target_column = gr.Dropdown(choices=[], label="🎯 Target Column", interactive=True) | |
| status_message = gr.Markdown("🔄 Loading sample data...") | |
| data_preview = gr.DataFrame(label="📋 Data Preview (First 5 Rows)", row_count=5, interactive=False, max_height=250) | |
| with gr.Accordion("🧠 MLP Architecture", open=True): | |
| gr.Markdown("**🏗️ Configure Hidden Layers** (Up to 8 layers)") | |
| with gr.Row(): | |
| layer1_neurons = gr.Number(label="Layer 1 Neurons", value=8, minimum=1, maximum=64, precision=0, info="Number of neurons") | |
| layer1_activation = gr.Dropdown(label="Layer 1 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu", info="Activation function") | |
| with gr.Row(): | |
| layer2_neurons = gr.Number(label="Layer 2 Neurons", value=4, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer2_activation = gr.Dropdown(label="Layer 2 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer3_neurons = gr.Number(label="Layer 3 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer3_activation = gr.Dropdown(label="Layer 3 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer4_neurons = gr.Number(label="Layer 4 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer4_activation = gr.Dropdown(label="Layer 4 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer5_neurons = gr.Number(label="Layer 5 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer5_activation = gr.Dropdown(label="Layer 5 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer6_neurons = gr.Number(label="Layer 6 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer6_activation = gr.Dropdown(label="Layer 6 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer7_neurons = gr.Number(label="Layer 7 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer7_activation = gr.Dropdown(label="Layer 7 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Row(): | |
| layer8_neurons = gr.Number(label="Layer 8 Neurons", value=0, minimum=0, maximum=64, precision=0, info="Set to 0 to disable") | |
| layer8_activation = gr.Dropdown(label="Layer 8 Activation", choices=["relu", "sigmoid", "tanh", "leakyRelu"], value="relu") | |
| with gr.Accordion("📊 Training Parameters & Input", open=True): | |
| gr.Markdown("**🧠 MLP (Multi-Layer Perceptron) Parameters**") | |
| with gr.Row(): | |
| epochs = gr.Number( | |
| label="Number of Epochs", | |
| value=100, minimum=1, maximum=1000, precision=0, | |
| info="Number of training iterations" | |
| ) | |
| learning_rate_slider = gr.Slider( | |
| label="Learning Rate (Power of 10)", | |
| value=4, minimum=0, maximum=6, step=1, | |
| info="0=1e-6, 1=1e-5, 2=1e-4, 3=1e-3, 4=1e-2, 5=1e-1, 6=1" | |
| ) | |
| learning_rate_display = gr.Markdown("**Current Learning Rate:** 0.01") | |
| batch_size_slider = gr.Slider( | |
| label="Batch Size (Power of 2)", | |
| value=10, minimum=0, maximum=10, step=1, | |
| info="Slide to select: 0=1, 1=2, 2=4, 3=8, ... Max=Full Batch" | |
| ) | |
| batch_size_display = gr.Markdown("**Current Batch Size:** Full Batch") | |
| with gr.Row(): | |
| optimizer_name = gr.Dropdown( | |
| label="Optimizer", | |
| choices=["adam", "sgd", "rmsprop"], | |
| value="adam", | |
| info="Optimization algorithm (Adam recommended)" | |
| ) | |
| reg_type = gr.Dropdown( | |
| label="Regularization", | |
| choices=["none", "l1", "l2"], | |
| value="none", | |
| info="Regularization type to prevent overfitting" | |
| ) | |
| reg_rate = gr.Number( | |
| label="Reg. Rate (λ)", | |
| value=0.001, minimum=0, maximum=0.1, step=0.0001, | |
| info="Regularization strength" | |
| ) | |
| gr.Markdown("**📊 Data Split Configuration**") | |
| with gr.Row(): | |
| train_test_split_ratio = gr.Slider( | |
| label="Train/Validation Split Ratio", | |
| value=0.8, minimum=0.6, maximum=0.9, step=0.05, | |
| info="Proportion of data used for training (e.g., 0.8 = 80% train, 20% validation)" | |
| ) | |
| inputs_group = gr.Group(visible=False) | |
| with inputs_group: | |
| input_status = gr.Markdown("Configure inputs above.") | |
| gr.Markdown("**📝 New Data Point** - Enter feature values for prediction:") | |
| input_components = [] | |
| for row in range(5): | |
| with gr.Row(): | |
| for col in range(4): | |
| idx = row * 4 + col | |
| if idx < 20: | |
| number_comp = gr.Number(label=f"Feature {idx+1}", visible=False) | |
| dropdown_comp = gr.Dropdown(label=f"Feature {idx+1}", visible=False) | |
| input_components.extend([number_comp, dropdown_comp]) | |
| run_prediction_btn = gr.Button("📊 Run Training & Prediction", variant="primary", size="lg") | |
| with gr.Column(scale=55): | |
| gr.Markdown("### 🧠 **MLP (Multi-Layer Perceptron) Results & Visualization**") | |
| train_loss_chart = gr.Plot(label="Training Loss & MAE Over Epochs", visible=True) | |
| val_loss_chart = gr.Plot(label="Validation Loss & MAE Over Epochs", visible=True) | |
| results_display = gr.HTML("**🧠 MLP (Multi-Layer Perceptron) Regression Results**<br><br>Training details will appear here showing model performance, learned parameters, and predictions.", label="🧠 Results & Predictions") | |
| gr.Markdown("""🧠 **MLP (Multi-Layer Perceptron) Regression Guide**: | |
| **📈 Training Metrics**: | |
| - **MSE (Mean Squared Error)**: Average squared difference between predicted and actual values. Lower MSE indicates better fit. | |
| - **MAE (Mean Absolute Error)**: Average absolute difference between predicted and actual values. More interpretable than MSE. | |
| - **R² (R-squared)**: Coefficient of determination. Measures how well the model explains variance. Closer to 1.0 is better. | |
| **🏗️ Architecture Parameters**: | |
| - **Hidden Layers**: Number of layers between input and output. More layers = more complex patterns, but risk of overfitting. | |
| - **Neurons per Layer**: Width of each layer. More neurons = more capacity, but requires more data and computation. | |
| - **Activation Functions**: ReLU (default), Sigmoid, Tanh, LeakyReLU. ReLU is most common for hidden layers. | |
| - **Output Layer**: Linear activation for regression (predicts continuous values). | |
| **🔧 Training Parameters**: | |
| - **Epochs**: Number of complete passes through training data. More epochs = better learning, but watch for overfitting. | |
| - **Learning Rate**: Step size for optimization. Recommended: 0.001 to 0.01. Too high may cause instability. | |
| - **Batch Size**: Samples processed before updating parameters. 0 = Full Batch (most stable). Smaller = faster updates but noisier. | |
| - **Optimizer**: Adam (recommended), SGD, RMSprop. Adam adapts learning rate automatically. | |
| - **Regularization**: L1 or L2 to prevent overfitting. Higher λ = more regularization. | |
| - **Train/Validation Split**: Proportion of data for training vs validation. Default 80/20 split. | |
| **🧮 Algorithm Details**: | |
| - **Multi-Layer Architecture**: Input → Hidden Layers → Output | |
| - **Activation Functions**: ReLU/Tanh/Sigmoid for hidden layers, Linear for output | |
| - **Mean Squared Error Loss**: Optimized for regression tasks | |
| - **Feature Normalization**: Automatic standardization (zero mean, unit variance) for stable training | |
| - **Target Normalization**: Target values are also normalized during training for better convergence | |
| - **Backpropagation**: Gradient-based learning through multiple layers | |
| **💡 Tips**: | |
| - Start with simple architecture (1-2 hidden layers, 8-16 neurons) | |
| - Use Adam optimizer with default learning rate (0.01) | |
| - Monitor validation metrics (MSE, MAE, R²) to detect overfitting | |
| - Add regularization (L2) if overfitting occurs | |
| - Use batch size = Full Batch for most stable training | |
| - Try different activation functions (ReLU is usually best for hidden layers) | |
| - For regression, ensure target values are continuous numeric values | |
| """) | |
| vlai_template.create_footer() | |
| load_evt = demo.load( | |
| fn=lambda: load_and_configure_data(None, "California Housing"), | |
| outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status], | |
| ).then( | |
| fn=update_batch_size_slider, | |
| inputs=[data_preview, target_column, train_test_split_ratio], | |
| outputs=[batch_size_slider], | |
| ).then( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ).then( | |
| fn=update_learning_rate_display, | |
| inputs=[learning_rate_slider], | |
| outputs=[learning_rate_display], | |
| ) | |
| upload_evt = file_upload.upload( | |
| fn=lambda file: load_and_configure_data(file, "California Housing"), | |
| inputs=[file_upload], | |
| outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status], | |
| ).then( | |
| fn=update_batch_size_slider, | |
| inputs=[data_preview, target_column, train_test_split_ratio], | |
| outputs=[batch_size_slider], | |
| ).then( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ) | |
| sample_dataset.change( | |
| fn=lambda choice: load_and_configure_data_simple(choice), | |
| inputs=[sample_dataset], | |
| outputs=[data_preview, target_column, status_message], | |
| ).then( | |
| fn=update_configuration, inputs=[data_preview, target_column], | |
| outputs=input_components + [inputs_group, input_status, status_message], | |
| ).then( | |
| fn=update_batch_size_slider, | |
| inputs=[data_preview, target_column, train_test_split_ratio], | |
| outputs=[batch_size_slider], | |
| ).then( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ) | |
| target_column.change( | |
| fn=update_configuration, inputs=[data_preview, target_column], | |
| outputs=input_components + [inputs_group, input_status, status_message], | |
| ).then( | |
| fn=update_batch_size_slider, | |
| inputs=[data_preview, target_column, train_test_split_ratio], | |
| outputs=[batch_size_slider], | |
| ).then( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ) | |
| # Update batch size display when slider or train/test split changes | |
| batch_size_slider.change( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ) | |
| train_test_split_ratio.change( | |
| fn=update_batch_size_slider, | |
| inputs=[data_preview, target_column, train_test_split_ratio], | |
| outputs=[batch_size_slider], | |
| ).then( | |
| fn=update_batch_size_display, | |
| inputs=[batch_size_slider, train_test_split_ratio], | |
| outputs=[batch_size_display], | |
| ) | |
| # Update learning rate display when slider changes | |
| learning_rate_slider.change( | |
| fn=update_learning_rate_display, | |
| inputs=[learning_rate_slider], | |
| outputs=[learning_rate_display], | |
| ) | |
| run_prediction_btn.click( | |
| fn=execute_prediction, | |
| inputs=[data_preview, target_column, epochs, learning_rate_slider, batch_size_slider, | |
| train_test_split_ratio, optimizer_name, reg_type, reg_rate, | |
| layer1_neurons, layer1_activation, layer2_neurons, layer2_activation, | |
| layer3_neurons, layer3_activation, layer4_neurons, layer4_activation, | |
| layer5_neurons, layer5_activation, layer6_neurons, layer6_activation, | |
| layer7_neurons, layer7_activation, layer8_neurons, layer8_activation] + input_components, | |
| outputs=[train_loss_chart, val_loss_chart, results_display], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=["static/aivn_logo.png", "static/vlai_logo.png", "static"]) | |