AIO2025M06_DEMO_SOFTMAX_REGRESSION

Sleeping

App Files Files Community

plethegenuine1908 commited on Nov 10

Commit

bd037d7

verified ·

1 Parent(s): 5dbb53f

Update src/softmax_regression.py

Browse files

Files changed (1) hide show

src/softmax_regression.py +96 -731

src/softmax_regression.py CHANGED Viewed

@@ -1,508 +1,11 @@
-# import pandas as pd
-# import numpy as np
-# from sklearn.datasets import load_iris, load_wine, make_classification
-# from sklearn.model_selection import train_test_split
-# from plotly.subplots import make_subplots
-# import plotly.graph_objects as go
-# import time
-# _current_model_params = None
-# def _get_current_model():
-#     return _current_model_params
-# def _set_current_model(params):
-#     global _current_model_params
-#     _current_model_params = params
-# def load_data(file_obj=None, dataset_choice="Breast Cancer"):
-#     """Load binary classification datasets"""
-#     if file_obj is not None:
-#         if file_obj.name.endswith(".csv"):
-#             encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
-#             for encoding in encodings:
-#                 try:
-#                     return pd.read_csv(file_obj.name, encoding=encoding)
-#                 except UnicodeDecodeError:
-#                     continue
-#             return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace")
-#         elif file_obj.name.endswith((".xlsx", ".xls")):
-#             return pd.read_excel(file_obj.name)
-#         else:
-#             raise ValueError("Unsupported format. Upload CSV or Excel files.")
-#    datasets = {
-#         "Iris": lambda: _sklearn_to_df(load_iris()),
-#         "Wine": lambda: _sklearn_to_df(load_wine()),
-#         "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
-#         "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
-#     }
-#     if dataset_choice not in datasets:
-#         raise ValueError(f"Unknown dataset: {dataset_choice}")
-#     return datasets[dataset_choice]()
-# def _sklearn_to_df(data):
-#     """Convert sklearn dataset to DataFrame"""
-#     df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
-#     if df.columns.isnull().any():
-#         df.columns = [f"feature_{i}" for i in range(df.shape[1])]
-#     df["target"] = data.target
-#     return df
-# # def _wine_to_binary_df(wine_data):
-# #     """Convert wine dataset to binary classification (class 0 vs others)"""
-# #     df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
-# #     df["target"] = (wine_data.target == 0).astype(int)
-# #     return df
-# def _synthetic_classification():
-#     """Generate synthetic binary classification dataset"""
-#     X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
-#                                n_redundant=5, n_classes=2, random_state=42)
-#     df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
-#     df["target"] = y
-#     return df
-# def create_input_components(df, target_col):
-#     """Create input components for feature values"""
-#     feature_cols = [c for c in df.columns if c != target_col]
-#     components = []
-#     for col in feature_cols:
-#         data = df[col]
-#         val = pd.to_numeric(data, errors="coerce").dropna().mean()
-#         val = 0.0 if pd.isna(val) else float(val)
-#         components.append(
-#             {
-#                 "name": col,
-#                 "type": "number",
-#                 "value": round(val, 3),
-#                 "minimum": None,
-#                 "maximum": None,
-#             }
-#         )
-#     return components
-# def preprocess_data(df, target_col, new_point_dict):
-#     """Preprocess data for logistic regression"""
-#     feature_cols = [c for c in df.columns if c != target_col]
-#     X = df[feature_cols].copy()
-#     y = df[target_col].copy()
-#     # Convert to numeric
-#     for col in feature_cols:
-#         X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
-#     # Ensure binary target (0 or 1)
-#     unique_vals = sorted(y.unique())
-#     if len(unique_vals) != 2:
-#         raise ValueError(f"Target must be binary (0/1). Found {len(unique_vals)} unique values: {unique_vals}")
-#     # Map to 0/1 if needed
-#     y_mapped = y.copy()
-#     if set(unique_vals) != {0, 1}:
-#         mapping = {unique_vals[0]: 0, unique_vals[1]: 1}
-#         y_mapped = y.map(mapping)
-#     # Prepare new point
-#     new_point = []
-#     for col in feature_cols:
-#         if col in new_point_dict:
-#             try:
-#                 new_point.append(float(new_point_dict[col]))
-#             except Exception:
-#                 new_point.append(0.0)
-#         else:
-#             new_point.append(0.0)
-#     new_point = np.array(new_point, dtype=float).reshape(1, -1)
-#     return X.values, np.array(y_mapped, dtype=int), new_point, feature_cols
-# def add_bias(X):
-#     """Add bias column to feature matrix"""
-#     return np.c_[np.ones(X.shape[0]), X]
-# # def sigmoid(z):
-# #     """Sigmoid activation function: σ(z) = 1 / (1 + exp(-z))"""
-# #     z = np.clip(z, -500, 500)
-# #     return 1 / (1 + np.exp(-z))
-# def softmax(Z):
-#     Z_shifted = Z - np.max(Z, axis=1, keepdims=True)  # Numerical stability
-#     exp_Z = np.exp(Z_shifted)
-#     return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
-# def predict_proba(X, theta):
-#     """Make probability predictions: y_hat = softmax(X @ theta)"""
-#     z = X.dot(theta)
-#     return softmax(z)
-# def predict_class(X, theta, threshold=0.5):
-#     """Make binary class predictions using threshold"""
-#     proba = predict_proba(X, theta)
-#     return (proba >= threshold).astype(int)
-# def compute_loss(y_hat, y):
-#     """Compute Binary Cross-Entropy loss: -[y*log(ŷ) + (1-y)*log(1-ŷ)]"""
-#     eps = 1e-15
-#     y_hat = np.clip(y_hat, eps, 1 - eps)
-#     loss = -(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
-#     return np.mean(loss)
-# def compute_gradient(y_hat, y, X):
-#     """Compute gradient: X.T @ (y_hat - y) / N"""
-#     N = len(y)
-#     return X.T.dot(y_hat - y) / N
-# def update_theta(theta, gradient, lr):
-#     """Update parameters using gradient descent"""
-#     return theta - lr * gradient
-# def compute_accuracy(y_true, y_pred):
-#     """Compute classification accuracy"""
-#     return np.mean(y_true == y_pred)
-# def normalize_features(X_train, X_val=None, X_test=None):
-#     """Normalize features using standardization (zero mean, unit variance)"""
-#     mean = np.mean(X_train, axis=0)
-#     std = np.std(X_train, axis=0)
-#     std[std == 0] = 1
-#     X_train_norm = (X_train - mean) / std
-#     X_val_norm = (X_val - mean) / std if X_val is not None else None
-#     X_test_norm = (X_test - mean) / std if X_test is not None else None
-#     return X_train_norm, X_val_norm, X_test_norm, mean, std
-# def train_logistic_regression_with_validation(X_train, y_train, X_val, y_val, epochs, learning_rate, batch_size=None):
-#     """
-#     Train logistic regression with mini-batch gradient descent
-#     Returns:
-#         theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
-#     """
-#     X_train_norm, X_val_norm, _, X_mean, X_std = normalize_features(X_train, X_val)
-#     X_train_bias = add_bias(X_train_norm)
-#     X_val_bias = add_bias(X_val_norm)
-#     np.random.seed(42)
-#     theta = np.random.randn(X_train_bias.shape[1]) * 0.01
-#     train_losses = []
-#     val_losses = []
-#     train_accuracies = []
-#     val_accuracies = []
-#     n_samples = X_train_bias.shape[0]
-#     if batch_size is None or batch_size >= n_samples:
-#         actual_batch_size = n_samples
-#     else:
-#         actual_batch_size = batch_size
-#     for epoch in range(epochs):
-#         if actual_batch_size < n_samples:
-#             indices = np.random.permutation(n_samples)
-#             X_train_shuffled = X_train_bias[indices]
-#             y_train_shuffled = y_train[indices]
-#         else:
-#             X_train_shuffled = X_train_bias
-#             y_train_shuffled = y_train
-#         for i in range(0, n_samples, actual_batch_size):
-#             X_batch = X_train_shuffled[i:i+actual_batch_size]
-#             y_batch = y_train_shuffled[i:i+actual_batch_size]
-#             y_batch_hat = predict_proba(X_batch, theta)
-#             gradient = compute_gradient(y_batch_hat, y_batch, X_batch)
-#             theta = update_theta(theta, gradient, learning_rate)
-#         y_train_hat = predict_proba(X_train_bias, theta)
-#         train_loss = compute_loss(y_train_hat, y_train)
-#         train_losses.append(train_loss)
-#         y_train_pred = predict_class(X_train_bias, theta)
-#         train_acc = compute_accuracy(y_train, y_train_pred)
-#         train_accuracies.append(train_acc)
-#         y_val_hat = predict_proba(X_val_bias, theta)
-#         val_loss = compute_loss(y_val_hat, y_val)
-#         val_losses.append(val_loss)
-#         y_val_pred = predict_class(X_val_bias, theta)
-#         val_acc = compute_accuracy(y_val, y_val_pred)
-#         val_accuracies.append(val_acc)
-#     return theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
-# def run_logistic_regression_and_visualize(df, target_col, new_point_dict,
-#                                         epochs, learning_rate, batch_size_str="Full Batch",
-#                                         train_test_split_ratio=0.8, threshold=0.5):
-#     """Run logistic regression training and generate visualizations"""
-#     X, y, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict)
-#     if epochs < 1:
-#         return None, None, None, "Number of epochs must be ≥ 1.", None
-#     if learning_rate <= 0:
-#         return None, None, None, "Learning rate must be > 0.", None
-#     test_size = 1.0 - train_test_split_ratio
-#     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
-#     if batch_size_str == "Full Batch":
-#         batch_size = None
-#     else:
-#         batch_size = int(batch_size_str)
-#     start_time = time.time()
-#     theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std = train_logistic_regression_with_validation(
-#         X_train, y_train, X_val, y_val, epochs, learning_rate, batch_size
-#     )
-#     training_time = time.time() - start_time
-#     _set_current_model({
-#         "theta": theta,
-#         "feature_cols": feature_cols,
-#         "X_mean": X_mean,
-#         "X_std": X_std
-#     })
-#     # Prepare normalized data for prediction with threshold
-#     X_train_norm, X_val_norm, _, _, _ = normalize_features(X_train, X_val)
-#     X_train_bias = add_bias(X_train_norm)
-#     X_val_bias = add_bias(X_val_norm)
-#     # Make prediction with threshold
-#     new_point_norm = (new_point - X_mean) / X_std
-#     new_point_bias = add_bias(new_point_norm)
-#     prediction_proba = predict_proba(new_point_bias, theta)[0]
-#     prediction_class = predict_class(new_point_bias, theta, threshold)[0]
-#     # Compute metrics with threshold
-#     y_train_pred_thresh = predict_class(X_train_bias, theta, threshold)
-#     y_val_pred_thresh = predict_class(X_val_bias, theta, threshold)
-#     train_acc_thresh = compute_accuracy(y_train, y_train_pred_thresh)
-#     val_acc_thresh = compute_accuracy(y_val, y_val_pred_thresh)
-#     final_train_loss = train_losses[-1]
-#     final_val_loss = val_losses[-1]
-#     final_train_acc = train_accuracies[-1]
-#     final_val_acc = val_accuracies[-1]
-#     train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
-#     val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
-#     results_display = create_results_display(
-#         theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, threshold,
-#         split_info={
-#             "train_size": len(X_train),
-#             "val_size": len(X_val),
-#             "train_ratio": train_test_split_ratio,
-#             "val_ratio": 1.0 - train_test_split_ratio,
-#             "train_loss": final_train_loss,
-#             "val_loss": final_val_loss,
-#             "train_acc": final_train_acc,
-#             "val_acc": final_val_acc,
-#             "train_acc_thresh": train_acc_thresh,
-#             "val_acc_thresh": val_acc_thresh,
-#             "batch_size": batch_size_str,
-#             "training_time": training_time
-#         }
-#     )
-#     return train_loss_fig, val_loss_fig, results_display, prediction_proba
-# def create_training_loss_chart(train_losses, train_accuracies):
-#     """Create training loss and accuracy visualization"""
-#     if not train_losses or len(train_losses) == 0:
-#         return None
-#     epochs = list(range(1, len(train_losses) + 1))
-#     valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in train_losses]
-#     fig = make_subplots(
-#         rows=2, cols=1,
-#         subplot_titles=("Training Loss (Binary Cross-Entropy)", "Training Accuracy"),
-#         vertical_spacing=0.15,
-#         row_heights=[0.5, 0.5]
-#     )
-#     fig.add_trace(
-#         go.Scatter(
-#             x=epochs,
-#             y=valid_losses,
-#             mode='lines+markers',
-#             name='Training Loss',
-#             line=dict(color='#1976D2', width=3),
-#             marker=dict(size=6),
-#             showlegend=True
-#         ),
-#         row=1, col=1
-#     )
-#     if train_accuracies and len(train_accuracies) == len(train_losses):
-#         valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in train_accuracies]
-#         fig.add_trace(
-#             go.Scatter(
-#                 x=epochs,
-#                 y=valid_accuracies,
-#                 mode='lines+markers',
-#                 name='Training Accuracy',
-#                 line=dict(color='#42A5F5', width=3),
-#                 marker=dict(size=6),
-#                 showlegend=True
-#             ),
-#             row=2, col=1
-#         )
-#     fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
-#     fig.update_layout(
-#         title="Training Metrics Over Epochs",
-#         plot_bgcolor="white",
-#         height=600,
-#         margin=dict(l=40, r=40, t=80, b=40)
-#     )
-#     return fig
-# def create_validation_loss_chart(val_losses, val_accuracies):
-#     """Create validation loss and accuracy visualization"""
-#     if not val_losses or len(val_losses) == 0:
-#         return None
-#     epochs = list(range(1, len(val_losses) + 1))
-#     valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in val_losses]
-#     fig = make_subplots(
-#         rows=2, cols=1,
-#         subplot_titles=("Validation Loss (Binary Cross-Entropy)", "Validation Accuracy"),
-#         vertical_spacing=0.15,
-#         row_heights=[0.5, 0.5]
-#     )
-#     fig.add_trace(
-#         go.Scatter(
-#             x=epochs,
-#             y=valid_losses,
-#             mode='lines+markers',
-#             name='Validation Loss',
-#             line=dict(color='#7B1FA2', width=3),
-#             marker=dict(size=6),
-#             showlegend=True
-#         ),
-#         row=1, col=1
-#     )
-#     if val_accuracies and len(val_accuracies) == len(val_losses):
-#         valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in val_accuracies]
-#         fig.add_trace(
-#             go.Scatter(
-#                 x=epochs,
-#                 y=valid_accuracies,
-#                 mode='lines+markers',
-#                 name='Validation Accuracy',
-#                 line=dict(color='#BA68C8', width=3),
-#                 marker=dict(size=6),
-#                 showlegend=True
-#             ),
-#             row=2, col=1
-#         )
-#     fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
-#     fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
-#     fig.update_layout(
-#         title="Validation Metrics Over Epochs",
-#         plot_bgcolor="white",
-#         height=600,
-#         margin=dict(l=40, r=40, t=80, b=40)
-#     )
-#     return fig
-# def create_results_display(theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, threshold, split_info):
-#     """Create HTML display showing model results"""
-#     theta_str = f"[{theta[0]:.4f}"
-#     for i, w in enumerate(theta[1:]):
-#         theta_str += f", {w:.4f}"
-#     theta_str += "]"
-#     html_content = f"""
-#     <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
-#         <strong style='color:#0D47A1;'>📊 Logistic Regression Results</strong><br><br>
-#         <div style='margin:8px 0;'>
-#             <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
-#             • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
-#             • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)}<br>
-#             • Normalization: Standardized | Activation: Sigmoid | Loss: Binary Cross-Entropy<br>
-#         </div>
-#         <div style='margin:8px 0;'>
-#             <strong style='color:#1976D2;'>📊 Data Split:</strong><br>
-#             • Training: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})<br>
-#             • Validation: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})<br>
-#         </div>
-#         <div style='margin:8px 0;'>
-#             <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
-#             • Training Loss (BCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
-#             • Validation Loss (BCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
-#             • Training Accuracy (threshold={threshold:.2f}): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc_thresh']*100:.2f}%</strong></span><br>
-#             • Validation Accuracy (threshold={threshold:.2f}): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc_thresh']*100:.2f}%</strong></span><br>
-#             • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
-#         </div>
-#         <div style='margin:8px 0;'>
-#             <strong style='color:#1976D2;'>🎯 Learned Parameters (θ):</strong><br>
-#             • Theta = <code style='background:#F3E5F5;padding:2px 6px;border-radius:4px;'>{theta_str}</code><br>
-#             • Bias (θ₀) = {theta[0]:.4f}<br>
-#         </div>
-#         <div style='margin:8px 0;'>
-#             <strong style='color:#1976D2;'>🔮 Prediction (Threshold = {threshold:.2f}):</strong><br>
-#             • Probability: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;'><strong>{prediction_proba:.4f}</strong></span> ({(prediction_proba*100):.2f}%)<br>
-#             • Predicted Class: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;'><strong>{prediction_class}</strong></span> (0 = Class 0, 1 = Class 1)<br>
-#             <em style='font-size:0.9em;color:#424242;'>* Adjust threshold to see how predictions change. Lower threshold → more predictions of class 1</em><br>
-#         </div>
-#     </div>
-#     """
-#     return html_content
 import pandas as pd
 import numpy as np
 from sklearn.datasets import load_iris, load_wine, make_classification
 from sklearn.model_selection import train_test_split
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
 import time
 _current_model_params = None
@@ -514,9 +17,8 @@ def _set_current_model(params):
     global _current_model_params
     _current_model_params = params
 def load_data(file_obj=None, dataset_choice="Iris"):
-    """Load multiclass classification datasets"""
     if file_obj is not None:
         if file_obj.name.endswith(".csv"):
             encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
@@ -537,11 +39,12 @@ def load_data(file_obj=None, dataset_choice="Iris"):
         "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
         "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
     }
     if dataset_choice not in datasets:
-        raise ValueError(f"Unknown dataset: {dataset_choice}")
     return datasets[dataset_choice]()
 def _sklearn_to_df(data):
     """Convert sklearn dataset to DataFrame"""
     df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
@@ -550,23 +53,14 @@ def _sklearn_to_df(data):
     df["target"] = data.target
     return df
 def _synthetic_multiclass(n_classes=3):
-    """Generate synthetic multiclass classification dataset"""
-    X, y = make_classification(
-        n_samples=1000,
-        n_features=10,
-        n_informative=8,
-        n_redundant=2,
-        n_classes=n_classes,
-        n_clusters_per_class=1,
-        random_state=42
-    )
     df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
     df["target"] = y
     return df
 def create_input_components(df, target_col):
     """Create input components for feature values"""
     feature_cols = [c for c in df.columns if c != target_col]
@@ -586,6 +80,9 @@ def create_input_components(df, target_col):
         )
     return components
 def preprocess_data(df, target_col, new_point_dict):
     """Preprocess data for softmax regression"""
@@ -597,19 +94,13 @@ def preprocess_data(df, target_col, new_point_dict):
     for col in feature_cols:
         X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
-    # Get unique classes
-    unique_vals = sorted(y.unique())
-    n_classes = len(unique_vals)
-    if n_classes < 2:
-        raise ValueError(f"Need at least 2 classes. Found {n_classes}")
-    # Map to 0, 1, 2, ... if needed
-    y_mapped = y.copy()
-    if list(unique_vals) != list(range(n_classes)):
-        mapping = {val: i for i, val in enumerate(unique_vals)}
-        y_mapped = y.map(mapping)
     # Prepare new point
     new_point = []
     for col in feature_cols:
@@ -622,112 +113,49 @@ def preprocess_data(df, target_col, new_point_dict):
             new_point.append(0.0)
     new_point = np.array(new_point, dtype=float).reshape(1, -1)
-    return X.values, np.array(y_mapped, dtype=int), new_point, feature_cols, n_classes
 def add_bias(X):
     """Add bias column to feature matrix"""
     return np.c_[np.ones(X.shape[0]), X]
 def softmax(Z):
-    """
-    Softmax activation function: σ(z_i) = exp(z_i) / Σ exp(z_j)
-    Args:
-        Z: (N, K) matrix where N = samples, K = classes
-    Returns:
-        Probabilities (N, K) where each row sums to 1
-    """
-    # Numerical stability: subtract max
     Z_shifted = Z - np.max(Z, axis=1, keepdims=True)
     exp_Z = np.exp(Z_shifted)
     return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
 def predict_proba(X, Theta):
-    """
-    Make probability predictions using softmax
-    Args:
-        X: (N, d+1) feature matrix with bias
-        Theta: (d+1, K) parameter matrix
-    Returns:
-        Probabilities (N, K)
-    """
-    Z = X.dot(Theta)  # (N, K)
     return softmax(Z)
 def predict_class(X, Theta):
-    """Make class predictions (argmax of probabilities)"""
     proba = predict_proba(X, Theta)
     return np.argmax(proba, axis=1)
-def one_hot_encode(y, n_classes):
-    """
-    Convert class labels to one-hot encoding
-    Args:
-        y: (N,) array of class labels [0, 1, 2, ...]
-        n_classes: number of classes K
-    Returns:
-        (N, K) one-hot matrix
-    """
-    N = len(y)
-    Y_onehot = np.zeros((N, n_classes))
-    Y_onehot[np.arange(N), y] = 1
-    return Y_onehot
-def compute_loss(Y_hat, Y_onehot):
-    """
-    Compute Categorical Cross-Entropy loss: -Σ y_k * log(ŷ_k)
-    Args:
-        Y_hat: (N, K) predicted probabilities
-        Y_onehot: (N, K) one-hot encoded true labels
-    Returns:
-        Scalar loss
-    """
     eps = 1e-15
     Y_hat = np.clip(Y_hat, eps, 1 - eps)
-    loss = -np.sum(Y_onehot * np.log(Y_hat))
-    return loss / len(Y_onehot)
-def compute_gradient(Y_hat, Y_onehot, X):
-    """
-    Compute gradient: X.T @ (Y_hat - Y_onehot) / N
-    Args:
-        Y_hat: (N, K) predicted probabilities
-        Y_onehot: (N, K) one-hot encoded labels
-        X: (N, d+1) feature matrix
-    Returns:
-        (d+1, K) gradient matrix
-    """
-    N = len(Y_onehot)
-    return X.T.dot(Y_hat - Y_onehot) / N
 def update_theta(Theta, gradient, lr):
     """Update parameters using gradient descent"""
     return Theta - lr * gradient
 def compute_accuracy(y_true, y_pred):
     """Compute classification accuracy"""
     return np.mean(y_true == y_pred)
 def normalize_features(X_train, X_val=None, X_test=None):
     """Normalize features using standardization (zero mean, unit variance)"""
     mean = np.mean(X_train, axis=0)
@@ -740,11 +168,9 @@ def normalize_features(X_train, X_val=None, X_test=None):
     return X_train_norm, X_val_norm, X_test_norm, mean, std
-def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_classes, epochs, learning_rate, batch_size=None):
     """
     Train softmax regression with mini-batch gradient descent
     Returns:
         Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
     """
@@ -753,13 +179,13 @@ def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_c
     X_train_bias = add_bias(X_train_norm)
     X_val_bias = add_bias(X_val_norm)
-    # Initialize Theta: (d+1, K) matrix
     np.random.seed(42)
-    Theta = np.random.randn(X_train_bias.shape[1], n_classes) * 0.01
     # One-hot encode targets
-    Y_train_onehot = one_hot_encode(y_train, n_classes)
-    Y_val_onehot = one_hot_encode(y_val, n_classes)
     train_losses = []
     val_losses = []
@@ -768,74 +194,83 @@ def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_c
     n_samples = X_train_bias.shape[0]
-    if batch_size is None or batch_size >= n_samples:
         actual_batch_size = n_samples
     else:
-        actual_batch_size = batch_size
     for epoch in range(epochs):
-        if actual_batch_size < n_samples:
-            indices = np.random.permutation(n_samples)
-            X_train_shuffled = X_train_bias[indices]
-            Y_train_shuffled = Y_train_onehot[indices]
-            y_train_shuffled = y_train[indices]
-        else:
-            X_train_shuffled = X_train_bias
-            Y_train_shuffled = Y_train_onehot
-            y_train_shuffled = y_train
-        # Mini-batch gradient descent
         for i in range(0, n_samples, actual_batch_size):
             X_batch = X_train_shuffled[i:i+actual_batch_size]
-            Y_batch = Y_train_shuffled[i:i+actual_batch_size]
             Y_batch_hat = predict_proba(X_batch, Theta)
             gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch)
             Theta = update_theta(Theta, gradient, learning_rate)
-        # Compute training metrics
         Y_train_hat = predict_proba(X_train_bias, Theta)
-        train_loss = compute_loss(Y_train_hat, Y_train_onehot)
         train_losses.append(train_loss)
         y_train_pred = predict_class(X_train_bias, Theta)
         train_acc = compute_accuracy(y_train, y_train_pred)
         train_accuracies.append(train_acc)
-        # Compute validation metrics
         Y_val_hat = predict_proba(X_val_bias, Theta)
-        val_loss = compute_loss(Y_val_hat, Y_val_onehot)
         val_losses.append(val_loss)
         y_val_pred = predict_class(X_val_bias, Theta)
         val_acc = compute_accuracy(y_val, y_val_pred)
         val_accuracies.append(val_acc)
-    return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
 def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
                                         epochs, learning_rate, batch_size_str="Full Batch",
                                         train_test_split_ratio=0.8):
     """Run softmax regression training and generate visualizations"""
-    X, y, new_point, feature_cols, n_classes = preprocess_data(df, target_col, new_point_dict)
     if epochs < 1:
-        return None, None, None, "Number of epochs must be ≥ 1.", None, None
     if learning_rate <= 0:
-        return None, None, None, "Learning rate must be > 0.", None, None
     test_size = 1.0 - train_test_split_ratio
     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
-    if batch_size_str == "Full Batch":
-        batch_size = None
-    else:
-        batch_size = int(batch_size_str)
     start_time = time.time()
-    Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std = train_softmax_regression_with_validation(
-        X_train, y_train, X_val, y_val, n_classes, epochs, learning_rate, batch_size
     )
     training_time = time.time() - start_time
@@ -844,26 +279,15 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
         "feature_cols": feature_cols,
         "X_mean": X_mean,
         "X_std": X_std,
-        "n_classes": n_classes
     })
-    # Prepare normalized data for prediction
-    X_train_norm, X_val_norm, _, _, _ = normalize_features(X_train, X_val)
-    X_train_bias = add_bias(X_train_norm)
-    X_val_bias = add_bias(X_val_norm)
-    # Make prediction
     new_point_norm = (new_point - X_mean) / X_std
     new_point_bias = add_bias(new_point_norm)
-    prediction_proba = predict_proba(new_point_bias, Theta)[0]  # (K,)
     prediction_class = np.argmax(prediction_proba)
-    # Compute final metrics
-    y_train_pred = predict_class(X_train_bias, Theta)
-    y_val_pred = predict_class(X_val_bias, Theta)
-    train_acc_final = compute_accuracy(y_train, y_train_pred)
-    val_acc_final = compute_accuracy(y_val, y_val_pred)
     final_train_loss = train_losses[-1]
     final_val_loss = val_losses[-1]
     final_train_acc = train_accuracies[-1]
@@ -871,9 +295,10 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
     train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
     val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
     results_display = create_results_display(
-        Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, n_classes,
         split_info={
             "train_size": len(X_train),
             "val_size": len(X_val),
@@ -883,18 +308,12 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
             "val_loss": final_val_loss,
             "train_acc": final_train_acc,
             "val_acc": final_val_acc,
-            "train_acc_final": train_acc_final,
-            "val_acc_final": val_acc_final,
             "batch_size": batch_size_str,
             "training_time": training_time
         }
     )
-    # Create confusion matrix visualization
-    confusion_fig = create_confusion_matrix(y_val, y_val_pred, n_classes)
-    return train_loss_fig, val_loss_fig, results_display, prediction_proba, prediction_class, confusion_fig
 def create_training_loss_chart(train_losses, train_accuracies):
     """Create training loss and accuracy visualization"""
@@ -953,7 +372,6 @@ def create_training_loss_chart(train_losses, train_accuracies):
     return fig
 def create_validation_loss_chart(val_losses, val_accuracies):
     """Create validation loss and accuracy visualization"""
     if not val_losses or len(val_losses) == 0:
@@ -1011,60 +429,15 @@ def create_validation_loss_chart(val_losses, val_accuracies):
     return fig
-def create_confusion_matrix(y_true, y_pred, n_classes):
-    """Create confusion matrix heatmap"""
-    # Compute confusion matrix
-    cm = np.zeros((n_classes, n_classes), dtype=int)
-    for true, pred in zip(y_true, y_pred):
-        cm[true, pred] += 1
-    # Create heatmap
-    fig = go.Figure(data=go.Heatmap(
-        z=cm,
-        x=[f"Pred {i}" for i in range(n_classes)],
-        y=[f"True {i}" for i in range(n_classes)],
-        colorscale='Blues',
-        text=cm,
-        texttemplate="%{text}",
-        textfont={"size": 16},
-        showscale=True,
-        hovertemplate='True: %{y}<br>Predicted: %{x}<br>Count: %{z}<extra></extra>'
-    ))
-    fig.update_layout(
-        title="Confusion Matrix (Validation Set)",
-        xaxis_title="Predicted Class",
-        yaxis_title="True Class",
-        height=500,
-        width=500,
-        plot_bgcolor="white"
-    )
-    return fig
-def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, n_classes, split_info):
     """Create HTML display showing model results"""
-    # Format Theta matrix (only show first few rows if too large)
-    max_display_rows = 5
-    theta_rows = []
-    for i in range(min(Theta.shape[0], max_display_rows)):
-        row_str = ", ".join([f"{w:.4f}" for w in Theta[i]])
-        theta_rows.append(f"[{row_str}]")
-    if Theta.shape[0] > max_display_rows:
-        theta_rows.append("...")
-    theta_str = "<br>&nbsp;&nbsp;&nbsp;&nbsp;".join(theta_rows)
-    # Format prediction probabilities
-    proba_str = "<br>".join([
-        f"&nbsp;&nbsp;• Class {i}: <span style='background:#E8F5E9;padding:2px 6px;border-radius:4px;'><strong>{prob:.4f}</strong></span> ({prob*100:.2f}%)"
-        for i, prob in enumerate(prediction_proba)
-    ])
     html_content = f"""
     <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
         <strong style='color:#0D47A1;'>📊 Softmax Regression Results</strong><br><br>
@@ -1072,9 +445,8 @@ def create_results_display(Theta, prediction_proba, prediction_class, feature_co
         <div style='margin:8px 0;'>
             <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
             • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
-            • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)}<br>
-            • Classes: {n_classes} | Normalization: Standardized<br>
-            • Activation: Softmax | Loss: Categorical Cross-Entropy<br>
         </div>
         <div style='margin:8px 0;'>
@@ -1087,32 +459,25 @@ def create_results_display(Theta, prediction_proba, prediction_class, feature_co
             <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
             • Training Loss (CCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
             • Validation Loss (CCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
-            • Training Accuracy: <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc_final']*100:.2f}%</strong></span><br>
-            • Validation Accuracy: <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc_final']*100:.2f}%</strong></span><br>
             • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
         </div>
         <div style='margin:8px 0;'>
             <strong style='color:#1976D2;'>🎯 Learned Parameters (Θ):</strong><br>
-            • Theta shape = ({Theta.shape[0]}, {Theta.shape[1]}) - (features+1, classes)<br>
-            • First {min(Theta.shape[0], max_display_rows)} rows:<br>
-            <code style='background:#F3E5F5;padding:6px;border-radius:4px;display:block;margin-top:4px;font-size:0.85em;'>
-            &nbsp;&nbsp;{theta_str}
-            </code>
         </div>
         <div style='margin:8px 0;'>
-            <strong style='color:#1976D2;'>🔮 Prediction for New Data Point:</strong><br>
             <strong>Class Probabilities:</strong><br>
-            {proba_str}<br><br>
-            <strong>Predicted Class:</strong> <span style='background:#81C784;padding:4px 10px;border-radius:6px;font-size:1.1em;'><strong>Class {prediction_class}</strong></span><br>
-            <em style='font-size:0.9em;color:#424242;margin-top:4px;display:block;'>
-            * The model outputs probabilities for all {n_classes} classes using softmax activation<br>
-            * Prediction is the class with highest probability (argmax)
-            </em>
         </div>
     </div>
     """
-    return html_content

 import pandas as pd
 import numpy as np
 from sklearn.datasets import load_iris, load_wine, make_classification
 from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix
 from plotly.subplots import make_subplots
 import plotly.graph_objects as go
+import plotly.express as px
 import time
 _current_model_params = None
     global _current_model_params
     _current_model_params = params
 def load_data(file_obj=None, dataset_choice="Iris"):
+    """Load multi-class classification datasets"""
     if file_obj is not None:
         if file_obj.name.endswith(".csv"):
             encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
         "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
         "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
     }
     if dataset_choice not in datasets:
+        # Fallback if choice is invalid
+        return datasets["Iris"]()
     return datasets[dataset_choice]()
 def _sklearn_to_df(data):
     """Convert sklearn dataset to DataFrame"""
     df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
     df["target"] = data.target
     return df
 def _synthetic_multiclass(n_classes=3):
+    """Generate synthetic multi-class classification dataset"""
+    X, y = make_classification(n_samples=1000, n_features=10, n_informative=8,
+                               n_redundant=2, n_classes=n_classes, random_state=42)
     df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
     df["target"] = y
     return df
 def create_input_components(df, target_col):
     """Create input components for feature values"""
     feature_cols = [c for c in df.columns if c != target_col]
         )
     return components
+def one_hot_encode(y, num_classes):
+    """Convert integer labels to one-hot encoded vectors"""
+    return np.eye(num_classes)[y]
 def preprocess_data(df, target_col, new_point_dict):
     """Preprocess data for softmax regression"""
     for col in feature_cols:
         X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
+    # Ensure target is numeric and get number of classes
+    y = pd.to_numeric(y, errors="coerce").fillna(0).astype(int)
+    num_classes = len(np.unique(y))
+    if num_classes < 2:
+        raise ValueError(f"Target must have at least 2 classes. Found {num_classes}.")
     # Prepare new point
     new_point = []
     for col in feature_cols:
             new_point.append(0.0)
     new_point = np.array(new_point, dtype=float).reshape(1, -1)
+    return X.values, y.values, num_classes, new_point, feature_cols
 def add_bias(X):
     """Add bias column to feature matrix"""
     return np.c_[np.ones(X.shape[0]), X]
 def softmax(Z):
+    """Softmax activation function: exp(z_k) / sum(exp(z_j))"""
+    # Shift Z for numerical stability to avoid overflow with exp()
     Z_shifted = Z - np.max(Z, axis=1, keepdims=True)
     exp_Z = np.exp(Z_shifted)
     return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
 def predict_proba(X, Theta):
+    """Make probability predictions: Y_hat = softmax(X @ Theta)"""
+    Z = X.dot(Theta)
     return softmax(Z)
 def predict_class(X, Theta):
+    """Make class predictions using argmax"""
     proba = predict_proba(X, Theta)
     return np.argmax(proba, axis=1)
+def compute_loss(Y_hat, Y_one_hot):
+    """Compute Categorical Cross-Entropy loss: -sum(y_k * log(y_hat_k))"""
     eps = 1e-15
     Y_hat = np.clip(Y_hat, eps, 1 - eps)
+    return -np.mean(np.sum(Y_one_hot * np.log(Y_hat), axis=1))
+def compute_gradient(Y_hat, Y_one_hot, X):
+    """Compute gradient: X.T @ (Y_hat - Y_one_hot) / N"""
+    N = X.shape[0]
+    return X.T.dot(Y_hat - Y_one_hot) / N
 def update_theta(Theta, gradient, lr):
     """Update parameters using gradient descent"""
     return Theta - lr * gradient
 def compute_accuracy(y_true, y_pred):
     """Compute classification accuracy"""
     return np.mean(y_true == y_pred)
 def normalize_features(X_train, X_val=None, X_test=None):
     """Normalize features using standardization (zero mean, unit variance)"""
     mean = np.mean(X_train, axis=0)
     return X_train_norm, X_val_norm, X_test_norm, mean, std
+def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size=None):
     """
     Train softmax regression with mini-batch gradient descent
     Returns:
         Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
     """
     X_train_bias = add_bias(X_train_norm)
     X_val_bias = add_bias(X_val_norm)
+    # Initialize Theta: (n_features + 1) x num_classes
     np.random.seed(42)
+    Theta = np.random.randn(X_train_bias.shape[1], num_classes) * 0.01
     # One-hot encode targets
+    Y_train_one_hot = one_hot_encode(y_train, num_classes)
+    Y_val_one_hot = one_hot_encode(y_val, num_classes)
     train_losses = []
     val_losses = []
     n_samples = X_train_bias.shape[0]
+    if batch_size is None or batch_size == "Full Batch" or int(batch_size) >= n_samples:
         actual_batch_size = n_samples
     else:
+        actual_batch_size = int(batch_size)
     for epoch in range(epochs):
+        # Shuffle training data
+        indices = np.random.permutation(n_samples)
+        X_train_shuffled = X_train_bias[indices]
+        Y_train_one_hot_shuffled = Y_train_one_hot[indices]
         for i in range(0, n_samples, actual_batch_size):
             X_batch = X_train_shuffled[i:i+actual_batch_size]
+            Y_batch = Y_train_one_hot_shuffled[i:i+actual_batch_size]
             Y_batch_hat = predict_proba(X_batch, Theta)
             gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch)
             Theta = update_theta(Theta, gradient, learning_rate)
+        # Compute metrics
         Y_train_hat = predict_proba(X_train_bias, Theta)
+        train_loss = compute_loss(Y_train_hat, Y_train_one_hot)
         train_losses.append(train_loss)
         y_train_pred = predict_class(X_train_bias, Theta)
         train_acc = compute_accuracy(y_train, y_train_pred)
         train_accuracies.append(train_acc)
         Y_val_hat = predict_proba(X_val_bias, Theta)
+        val_loss = compute_loss(Y_val_hat, Y_val_one_hot)
         val_losses.append(val_loss)
         y_val_pred = predict_class(X_val_bias, Theta)
         val_acc = compute_accuracy(y_val, y_val_pred)
         val_accuracies.append(val_acc)
+    return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val, y_val_pred
+def create_confusion_matrix_chart(y_true, y_pred, num_classes):
+    """Create confusion matrix visualization using plotly"""
+    cm = confusion_matrix(y_true, y_pred)
+    labels = [f"Class {i}" for i in range(num_classes)]
+    fig = px.imshow(cm,
+                    labels=dict(x="Predicted Label", y="True Label", color="Count"),
+                    x=labels,
+                    y=labels,
+                    text_auto=True,
+                    color_continuous_scale='Blues')
+    fig.update_layout(
+        title="Confusion Matrix (Validation Set)",
+        plot_bgcolor="white",
+        height=400,
+        margin=dict(l=40, r=40, t=80, b=40)
+    )
+    return fig
 def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
                                         epochs, learning_rate, batch_size_str="Full Batch",
                                         train_test_split_ratio=0.8):
     """Run softmax regression training and generate visualizations"""
+    X, y, num_classes, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict)
     if epochs < 1:
+        return None, None, None, "Number of epochs must be ≥ 1.", None
     if learning_rate <= 0:
+        return None, None, None, "Learning rate must be > 0.", None
     test_size = 1.0 - train_test_split_ratio
+    # Ensure stratify works even with small classes by checking counts if needed,
+    # but for simplicity we'll assume data is sufficient for demo.
     X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
     start_time = time.time()
+    Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val_final, y_val_pred_final = train_softmax_regression_with_validation(
+        X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size_str
     )
     training_time = time.time() - start_time
         "feature_cols": feature_cols,
         "X_mean": X_mean,
         "X_std": X_std,
+        "num_classes": num_classes
     })
+    # Make prediction for new point
     new_point_norm = (new_point - X_mean) / X_std
     new_point_bias = add_bias(new_point_norm)
+    prediction_proba = predict_proba(new_point_bias, Theta)[0]
     prediction_class = np.argmax(prediction_proba)
     final_train_loss = train_losses[-1]
     final_val_loss = val_losses[-1]
     final_train_acc = train_accuracies[-1]
     train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
     val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
+    # confusion_fig = create_confusion_matrix_chart(y_val_final, y_val_pred_final, num_classes)
     results_display = create_results_display(
+        Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes,
         split_info={
             "train_size": len(X_train),
             "val_size": len(X_val),
             "val_loss": final_val_loss,
             "train_acc": final_train_acc,
             "val_acc": final_val_acc,
             "batch_size": batch_size_str,
             "training_time": training_time
         }
     )
+    return train_loss_fig, val_loss_fig, results_display
 def create_training_loss_chart(train_losses, train_accuracies):
     """Create training loss and accuracy visualization"""
     return fig
 def create_validation_loss_chart(val_losses, val_accuracies):
     """Create validation loss and accuracy visualization"""
     if not val_losses or len(val_losses) == 0:
     return fig
+def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info):
     """Create HTML display showing model results"""
+    # Format Theta for display (just showing shape or first few parameters if needed, usually too large for multi-class)
+    theta_shape_str = f"{Theta.shape[0]} x {Theta.shape[1]}"
+    # Format predicted probabilities for each class
+    proba_str = "<br>".join([f"• Class {i}: <strong>{p:.4f}</strong> ({p*100:.2f}%)" for i, p in enumerate(prediction_proba)])
     html_content = f"""
     <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
         <strong style='color:#0D47A1;'>📊 Softmax Regression Results</strong><br><br>
         <div style='margin:8px 0;'>
             <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
             • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
+            • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)} | Classes: {num_classes}<br>
+            • Normalization: Standardized | Activation: Softmax | Loss: Categorical Cross-Entropy<br>
         </div>
         <div style='margin:8px 0;'>
             <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
             • Training Loss (CCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
             • Validation Loss (CCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
+            • Training Accuracy: <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc']*100:.2f}%</strong></span><br>
+            • Validation Accuracy: <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc']*100:.2f}%</strong></span><br>
             • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
         </div>
         <div style='margin:8px 0;'>
             <strong style='color:#1976D2;'>🎯 Learned Parameters (Θ):</strong><br>
+            • Theta Shape = <code style='background:#F3E5F5;padding:2px 6px;border-radius:4px;'>{theta_shape_str}</code> (Features+Bias x Classes)<br>
         </div>
         <div style='margin:8px 0;'>
+            <strong style='color:#1976D2;'>🔮 Prediction for New Point:</strong><br>
+            • Predicted Class: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;font-size:1.1em;'><strong>Class {prediction_class}</strong></span><br>
+            <div style='margin-top:8px;font-size:0.95em;'>
             <strong>Class Probabilities:</strong><br>
+            {proba_str}
+            </div>
         </div>
     </div>
     """
+    return html_content