import pandas as pd import numpy as np from sklearn.datasets import load_iris, load_wine, make_classification from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from plotly.subplots import make_subplots import plotly.graph_objects as go import plotly.express as px import time _current_model_params = None def _get_current_model(): return _current_model_params def _set_current_model(params): global _current_model_params _current_model_params = params def load_data(file_obj=None, dataset_choice="Iris"): """Load multi-class classification datasets""" if file_obj is not None: if file_obj.name.endswith(".csv"): encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"] for encoding in encodings: try: return pd.read_csv(file_obj.name, encoding=encoding) except UnicodeDecodeError: continue return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace") elif file_obj.name.endswith((".xlsx", ".xls")): return pd.read_excel(file_obj.name) else: raise ValueError("Unsupported format. Upload CSV or Excel files.") datasets = { "Iris": lambda: _sklearn_to_df(load_iris()), "Wine": lambda: _sklearn_to_df(load_wine()), "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3), "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5), } if dataset_choice not in datasets: # Fallback if choice is invalid return datasets["Iris"]() return datasets[dataset_choice]() def _sklearn_to_df(data): """Convert sklearn dataset to DataFrame""" df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None)) if df.columns.isnull().any(): df.columns = [f"feature_{i}" for i in range(df.shape[1])] df["target"] = data.target return df def _synthetic_multiclass(n_classes=3): """Generate synthetic multi-class classification dataset""" X, y = make_classification(n_samples=1000, n_features=10, n_informative=8, n_redundant=2, n_classes=n_classes, random_state=42) df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])]) df["target"] = y return df def create_input_components(df, target_col): """Create input components for feature values""" feature_cols = [c for c in df.columns if c != target_col] components = [] for col in feature_cols: data = df[col] val = pd.to_numeric(data, errors="coerce").dropna().mean() val = 0.0 if pd.isna(val) else float(val) components.append( { "name": col, "type": "number", "value": round(val, 3), "minimum": None, "maximum": None, } ) return components def one_hot_encode(y, num_classes): """Convert integer labels to one-hot encoded vectors""" return np.eye(num_classes)[y] def preprocess_data(df, target_col, new_point_dict): """Preprocess data for softmax regression""" feature_cols = [c for c in df.columns if c != target_col] X = df[feature_cols].copy() y = df[target_col].copy() # Convert to numeric for col in feature_cols: X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0) # Ensure target is numeric and get number of classes y = pd.to_numeric(y, errors="coerce").fillna(0).astype(int) num_classes = len(np.unique(y)) if num_classes < 2: raise ValueError(f"Target must have at least 2 classes. Found {num_classes}.") # Prepare new point new_point = [] for col in feature_cols: if col in new_point_dict: try: new_point.append(float(new_point_dict[col])) except Exception: new_point.append(0.0) else: new_point.append(0.0) new_point = np.array(new_point, dtype=float).reshape(1, -1) return X.values, y.values, num_classes, new_point, feature_cols def add_bias(X): """Add bias column to feature matrix""" return np.c_[np.ones(X.shape[0]), X] def softmax(Z): """Softmax activation function: exp(z_k) / sum(exp(z_j))""" # Shift Z for numerical stability to avoid overflow with exp() Z_shifted = Z - np.max(Z, axis=1, keepdims=True) exp_Z = np.exp(Z_shifted) return exp_Z / np.sum(exp_Z, axis=1, keepdims=True) def predict_proba(X, Theta): """Make probability predictions: Y_hat = softmax(X @ Theta)""" Z = X.dot(Theta) return softmax(Z) def predict_class(X, Theta): """Make class predictions using argmax""" proba = predict_proba(X, Theta) return np.argmax(proba, axis=1) def compute_loss(Y_hat, Y_one_hot): """Compute Categorical Cross-Entropy loss: -sum(y_k * log(y_hat_k))""" eps = 1e-15 Y_hat = np.clip(Y_hat, eps, 1 - eps) return -np.mean(np.sum(Y_one_hot * np.log(Y_hat), axis=1)) def compute_gradient(Y_hat, Y_one_hot, X): """Compute gradient: X.T @ (Y_hat - Y_one_hot) / N""" N = X.shape[0] return X.T.dot(Y_hat - Y_one_hot) / N def update_theta(Theta, gradient, lr): """Update parameters using gradient descent""" return Theta - lr * gradient def compute_accuracy(y_true, y_pred): """Compute classification accuracy""" return np.mean(y_true == y_pred) def normalize_features(X_train, X_val=None, X_test=None): """Normalize features using standardization (zero mean, unit variance)""" mean = np.mean(X_train, axis=0) std = np.std(X_train, axis=0) std[std == 0] = 1 X_train_norm = (X_train - mean) / std X_val_norm = (X_val - mean) / std if X_val is not None else None X_test_norm = (X_test - mean) / std if X_test is not None else None return X_train_norm, X_val_norm, X_test_norm, mean, std def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size=None): """ Train softmax regression with mini-batch gradient descent Returns: Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std """ X_train_norm, X_val_norm, _, X_mean, X_std = normalize_features(X_train, X_val) X_train_bias = add_bias(X_train_norm) X_val_bias = add_bias(X_val_norm) # Initialize Theta: (n_features + 1) x num_classes np.random.seed(42) Theta = np.random.randn(X_train_bias.shape[1], num_classes) * 0.01 # One-hot encode targets Y_train_one_hot = one_hot_encode(y_train, num_classes) Y_val_one_hot = one_hot_encode(y_val, num_classes) train_losses = [] val_losses = [] train_accuracies = [] val_accuracies = [] n_samples = X_train_bias.shape[0] if batch_size is None or batch_size == "Full Batch" or int(batch_size) >= n_samples: actual_batch_size = n_samples else: actual_batch_size = int(batch_size) for epoch in range(epochs): # Shuffle training data indices = np.random.permutation(n_samples) X_train_shuffled = X_train_bias[indices] Y_train_one_hot_shuffled = Y_train_one_hot[indices] for i in range(0, n_samples, actual_batch_size): X_batch = X_train_shuffled[i:i+actual_batch_size] Y_batch = Y_train_one_hot_shuffled[i:i+actual_batch_size] Y_batch_hat = predict_proba(X_batch, Theta) gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch) Theta = update_theta(Theta, gradient, learning_rate) # Compute metrics Y_train_hat = predict_proba(X_train_bias, Theta) train_loss = compute_loss(Y_train_hat, Y_train_one_hot) train_losses.append(train_loss) y_train_pred = predict_class(X_train_bias, Theta) train_acc = compute_accuracy(y_train, y_train_pred) train_accuracies.append(train_acc) Y_val_hat = predict_proba(X_val_bias, Theta) val_loss = compute_loss(Y_val_hat, Y_val_one_hot) val_losses.append(val_loss) y_val_pred = predict_class(X_val_bias, Theta) val_acc = compute_accuracy(y_val, y_val_pred) val_accuracies.append(val_acc) return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val, y_val_pred def create_confusion_matrix_chart(y_true, y_pred, num_classes): """Create confusion matrix visualization using plotly""" cm = confusion_matrix(y_true, y_pred) labels = [f"Class {i}" for i in range(num_classes)] fig = px.imshow(cm, labels=dict(x="Predicted Label", y="True Label", color="Count"), x=labels, y=labels, text_auto=True, color_continuous_scale='Blues') fig.update_layout( title="Confusion Matrix (Validation Set)", plot_bgcolor="white", height=400, margin=dict(l=40, r=40, t=80, b=40) ) return fig def run_softmax_regression_and_visualize(df, target_col, new_point_dict, epochs, learning_rate, batch_size_str="Full Batch", train_test_split_ratio=0.8): """Run softmax regression training and generate visualizations""" X, y, num_classes, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict) if epochs < 1: return None, None, None, "Number of epochs must be ≥ 1.", None if learning_rate <= 0: return None, None, None, "Learning rate must be > 0.", None test_size = 1.0 - train_test_split_ratio # Ensure stratify works even with small classes by checking counts if needed, # but for simplicity we'll assume data is sufficient for demo. X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y) start_time = time.time() Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val_final, y_val_pred_final = train_softmax_regression_with_validation( X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size_str ) training_time = time.time() - start_time _set_current_model({ "Theta": Theta, "feature_cols": feature_cols, "X_mean": X_mean, "X_std": X_std, "num_classes": num_classes }) # Make prediction for new point new_point_norm = (new_point - X_mean) / X_std new_point_bias = add_bias(new_point_norm) prediction_proba = predict_proba(new_point_bias, Theta)[0] prediction_class = np.argmax(prediction_proba) final_train_loss = train_losses[-1] final_val_loss = val_losses[-1] final_train_acc = train_accuracies[-1] final_val_acc = val_accuracies[-1] train_loss_fig = create_training_loss_chart(train_losses, train_accuracies) val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies) # confusion_fig = create_confusion_matrix_chart(y_val_final, y_val_pred_final, num_classes) results_display = create_results_display( Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info={ "train_size": len(X_train), "val_size": len(X_val), "train_ratio": train_test_split_ratio, "val_ratio": 1.0 - train_test_split_ratio, "train_loss": final_train_loss, "val_loss": final_val_loss, "train_acc": final_train_acc, "val_acc": final_val_acc, "batch_size": batch_size_str, "training_time": training_time } ) return train_loss_fig, val_loss_fig, results_display def create_training_loss_chart(train_losses, train_accuracies): """Create training loss and accuracy visualization""" if not train_losses or len(train_losses) == 0: return None epochs = list(range(1, len(train_losses) + 1)) valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in train_losses] fig = make_subplots( rows=2, cols=1, subplot_titles=("Training Loss (Categorical Cross-Entropy)", "Training Accuracy"), vertical_spacing=0.15, row_heights=[0.5, 0.5] ) fig.add_trace( go.Scatter( x=epochs, y=valid_losses, mode='lines+markers', name='Training Loss', line=dict(color='#1976D2', width=3), marker=dict(size=6), showlegend=True ), row=1, col=1 ) if train_accuracies and len(train_accuracies) == len(train_losses): valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in train_accuracies] fig.add_trace( go.Scatter( x=epochs, y=valid_accuracies, mode='lines+markers', name='Training Accuracy', line=dict(color='#42A5F5', width=3), marker=dict(size=6), showlegend=True ), row=2, col=1 ) fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100]) fig.update_layout( title="Training Metrics Over Epochs", plot_bgcolor="white", height=600, margin=dict(l=40, r=40, t=80, b=40) ) return fig def create_validation_loss_chart(val_losses, val_accuracies): """Create validation loss and accuracy visualization""" if not val_losses or len(val_losses) == 0: return None epochs = list(range(1, len(val_losses) + 1)) valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in val_losses] fig = make_subplots( rows=2, cols=1, subplot_titles=("Validation Loss (Categorical Cross-Entropy)", "Validation Accuracy"), vertical_spacing=0.15, row_heights=[0.5, 0.5] ) fig.add_trace( go.Scatter( x=epochs, y=valid_losses, mode='lines+markers', name='Validation Loss', line=dict(color='#7B1FA2', width=3), marker=dict(size=6), showlegend=True ), row=1, col=1 ) if val_accuracies and len(val_accuracies) == len(val_losses): valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in val_accuracies] fig.add_trace( go.Scatter( x=epochs, y=valid_accuracies, mode='lines+markers', name='Validation Accuracy', line=dict(color='#BA68C8', width=3), marker=dict(size=6), showlegend=True ), row=2, col=1 ) fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100]) fig.update_layout( title="Validation Metrics Over Epochs", plot_bgcolor="white", height=600, margin=dict(l=40, r=40, t=80, b=40) ) return fig def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info): """Create HTML display showing model results""" # Format Theta for display (just showing shape or first few parameters if needed, usually too large for multi-class) theta_shape_str = f"{Theta.shape[0]} x {Theta.shape[1]}" # Format predicted probabilities for each class proba_str = "
".join([f"• Class {i}: {p:.4f} ({p*100:.2f}%)" for i, p in enumerate(prediction_proba)]) html_content = f"""
📊 Softmax Regression Results

🔧 Model Configuration:
• Epochs: {epochs} | Learning Rate: {learning_rate}
• Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)} | Classes: {num_classes}
• Normalization: Standardized | Activation: Softmax | Loss: Categorical Cross-Entropy
📊 Data Split:
• Training: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})
• Validation: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})
📈 Performance Metrics:
• Training Loss (CCE): {split_info['train_loss']:.4f}
• Validation Loss (CCE): {split_info['val_loss']:.4f}
• Training Accuracy: {split_info['train_acc']*100:.2f}%
• Validation Accuracy: {split_info['val_acc']*100:.2f}%
• Training Time: {split_info['training_time']:.4f}s
🎯 Learned Parameters (Θ):
• Theta Shape = {theta_shape_str} (Features+Bias x Classes)
🔮 Prediction for New Point:
• Predicted Class: Class {prediction_class}
Class Probabilities:
{proba_str}
""" return html_content