|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.datasets import load_iris, load_wine, make_classification |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import confusion_matrix |
|
|
from plotly.subplots import make_subplots |
|
|
import plotly.graph_objects as go |
|
|
import plotly.express as px |
|
|
import time |
|
|
|
|
|
_current_model_params = None |
|
|
|
|
|
def _get_current_model(): |
|
|
return _current_model_params |
|
|
|
|
|
def _set_current_model(params): |
|
|
global _current_model_params |
|
|
_current_model_params = params |
|
|
|
|
|
def load_data(file_obj=None, dataset_choice="Iris"): |
|
|
"""Load multi-class classification datasets""" |
|
|
if file_obj is not None: |
|
|
if file_obj.name.endswith(".csv"): |
|
|
encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"] |
|
|
for encoding in encodings: |
|
|
try: |
|
|
return pd.read_csv(file_obj.name, encoding=encoding) |
|
|
except UnicodeDecodeError: |
|
|
continue |
|
|
return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace") |
|
|
elif file_obj.name.endswith((".xlsx", ".xls")): |
|
|
return pd.read_excel(file_obj.name) |
|
|
else: |
|
|
raise ValueError("Unsupported format. Upload CSV or Excel files.") |
|
|
|
|
|
datasets = { |
|
|
"Iris": lambda: _sklearn_to_df(load_iris()), |
|
|
"Wine": lambda: _sklearn_to_df(load_wine()), |
|
|
"Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3), |
|
|
"Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5), |
|
|
} |
|
|
|
|
|
if dataset_choice not in datasets: |
|
|
|
|
|
return datasets["Iris"]() |
|
|
return datasets[dataset_choice]() |
|
|
|
|
|
def _sklearn_to_df(data): |
|
|
"""Convert sklearn dataset to DataFrame""" |
|
|
df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None)) |
|
|
if df.columns.isnull().any(): |
|
|
df.columns = [f"feature_{i}" for i in range(df.shape[1])] |
|
|
df["target"] = data.target |
|
|
return df |
|
|
|
|
|
def _synthetic_multiclass(n_classes=3): |
|
|
"""Generate synthetic multi-class classification dataset""" |
|
|
X, y = make_classification(n_samples=1000, n_features=10, n_informative=8, |
|
|
n_redundant=2, n_classes=n_classes, random_state=42) |
|
|
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])]) |
|
|
df["target"] = y |
|
|
return df |
|
|
|
|
|
def create_input_components(df, target_col): |
|
|
"""Create input components for feature values""" |
|
|
feature_cols = [c for c in df.columns if c != target_col] |
|
|
components = [] |
|
|
for col in feature_cols: |
|
|
data = df[col] |
|
|
val = pd.to_numeric(data, errors="coerce").dropna().mean() |
|
|
val = 0.0 if pd.isna(val) else float(val) |
|
|
components.append( |
|
|
{ |
|
|
"name": col, |
|
|
"type": "number", |
|
|
"value": round(val, 3), |
|
|
"minimum": None, |
|
|
"maximum": None, |
|
|
} |
|
|
) |
|
|
return components |
|
|
|
|
|
def one_hot_encode(y, num_classes): |
|
|
"""Convert integer labels to one-hot encoded vectors""" |
|
|
return np.eye(num_classes)[y] |
|
|
|
|
|
def preprocess_data(df, target_col, new_point_dict): |
|
|
"""Preprocess data for softmax regression""" |
|
|
feature_cols = [c for c in df.columns if c != target_col] |
|
|
X = df[feature_cols].copy() |
|
|
y = df[target_col].copy() |
|
|
|
|
|
|
|
|
for col in feature_cols: |
|
|
X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0) |
|
|
|
|
|
|
|
|
y = pd.to_numeric(y, errors="coerce").fillna(0).astype(int) |
|
|
num_classes = len(np.unique(y)) |
|
|
|
|
|
if num_classes < 2: |
|
|
raise ValueError(f"Target must have at least 2 classes. Found {num_classes}.") |
|
|
|
|
|
|
|
|
new_point = [] |
|
|
for col in feature_cols: |
|
|
if col in new_point_dict: |
|
|
try: |
|
|
new_point.append(float(new_point_dict[col])) |
|
|
except Exception: |
|
|
new_point.append(0.0) |
|
|
else: |
|
|
new_point.append(0.0) |
|
|
|
|
|
new_point = np.array(new_point, dtype=float).reshape(1, -1) |
|
|
|
|
|
return X.values, y.values, num_classes, new_point, feature_cols |
|
|
|
|
|
def add_bias(X): |
|
|
"""Add bias column to feature matrix""" |
|
|
return np.c_[np.ones(X.shape[0]), X] |
|
|
|
|
|
def softmax(Z): |
|
|
"""Softmax activation function: exp(z_k) / sum(exp(z_j))""" |
|
|
|
|
|
Z_shifted = Z - np.max(Z, axis=1, keepdims=True) |
|
|
exp_Z = np.exp(Z_shifted) |
|
|
return exp_Z / np.sum(exp_Z, axis=1, keepdims=True) |
|
|
|
|
|
def predict_proba(X, Theta): |
|
|
"""Make probability predictions: Y_hat = softmax(X @ Theta)""" |
|
|
Z = X.dot(Theta) |
|
|
return softmax(Z) |
|
|
|
|
|
def predict_class(X, Theta): |
|
|
"""Make class predictions using argmax""" |
|
|
proba = predict_proba(X, Theta) |
|
|
return np.argmax(proba, axis=1) |
|
|
|
|
|
def compute_loss(Y_hat, Y_one_hot): |
|
|
"""Compute Categorical Cross-Entropy loss: -sum(y_k * log(y_hat_k))""" |
|
|
eps = 1e-15 |
|
|
Y_hat = np.clip(Y_hat, eps, 1 - eps) |
|
|
return -np.mean(np.sum(Y_one_hot * np.log(Y_hat), axis=1)) |
|
|
|
|
|
def compute_gradient(Y_hat, Y_one_hot, X): |
|
|
"""Compute gradient: X.T @ (Y_hat - Y_one_hot) / N""" |
|
|
N = X.shape[0] |
|
|
return X.T.dot(Y_hat - Y_one_hot) / N |
|
|
|
|
|
def update_theta(Theta, gradient, lr): |
|
|
"""Update parameters using gradient descent""" |
|
|
return Theta - lr * gradient |
|
|
|
|
|
def compute_accuracy(y_true, y_pred): |
|
|
"""Compute classification accuracy""" |
|
|
return np.mean(y_true == y_pred) |
|
|
|
|
|
def normalize_features(X_train, X_val=None, X_test=None): |
|
|
"""Normalize features using standardization (zero mean, unit variance)""" |
|
|
mean = np.mean(X_train, axis=0) |
|
|
std = np.std(X_train, axis=0) |
|
|
std[std == 0] = 1 |
|
|
|
|
|
X_train_norm = (X_train - mean) / std |
|
|
X_val_norm = (X_val - mean) / std if X_val is not None else None |
|
|
X_test_norm = (X_test - mean) / std if X_test is not None else None |
|
|
|
|
|
return X_train_norm, X_val_norm, X_test_norm, mean, std |
|
|
|
|
|
def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size=None): |
|
|
""" |
|
|
Train softmax regression with mini-batch gradient descent |
|
|
Returns: |
|
|
Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std |
|
|
""" |
|
|
X_train_norm, X_val_norm, _, X_mean, X_std = normalize_features(X_train, X_val) |
|
|
|
|
|
X_train_bias = add_bias(X_train_norm) |
|
|
X_val_bias = add_bias(X_val_norm) |
|
|
|
|
|
|
|
|
np.random.seed(42) |
|
|
Theta = np.random.randn(X_train_bias.shape[1], num_classes) * 0.01 |
|
|
|
|
|
|
|
|
Y_train_one_hot = one_hot_encode(y_train, num_classes) |
|
|
Y_val_one_hot = one_hot_encode(y_val, num_classes) |
|
|
|
|
|
train_losses = [] |
|
|
val_losses = [] |
|
|
train_accuracies = [] |
|
|
val_accuracies = [] |
|
|
|
|
|
n_samples = X_train_bias.shape[0] |
|
|
|
|
|
if batch_size is None or batch_size == "Full Batch" or int(batch_size) >= n_samples: |
|
|
actual_batch_size = n_samples |
|
|
else: |
|
|
actual_batch_size = int(batch_size) |
|
|
|
|
|
for epoch in range(epochs): |
|
|
|
|
|
indices = np.random.permutation(n_samples) |
|
|
X_train_shuffled = X_train_bias[indices] |
|
|
Y_train_one_hot_shuffled = Y_train_one_hot[indices] |
|
|
|
|
|
for i in range(0, n_samples, actual_batch_size): |
|
|
X_batch = X_train_shuffled[i:i+actual_batch_size] |
|
|
Y_batch = Y_train_one_hot_shuffled[i:i+actual_batch_size] |
|
|
|
|
|
Y_batch_hat = predict_proba(X_batch, Theta) |
|
|
gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch) |
|
|
Theta = update_theta(Theta, gradient, learning_rate) |
|
|
|
|
|
|
|
|
Y_train_hat = predict_proba(X_train_bias, Theta) |
|
|
train_loss = compute_loss(Y_train_hat, Y_train_one_hot) |
|
|
train_losses.append(train_loss) |
|
|
|
|
|
y_train_pred = predict_class(X_train_bias, Theta) |
|
|
train_acc = compute_accuracy(y_train, y_train_pred) |
|
|
train_accuracies.append(train_acc) |
|
|
|
|
|
Y_val_hat = predict_proba(X_val_bias, Theta) |
|
|
val_loss = compute_loss(Y_val_hat, Y_val_one_hot) |
|
|
val_losses.append(val_loss) |
|
|
|
|
|
y_val_pred = predict_class(X_val_bias, Theta) |
|
|
val_acc = compute_accuracy(y_val, y_val_pred) |
|
|
val_accuracies.append(val_acc) |
|
|
|
|
|
return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val, y_val_pred |
|
|
|
|
|
def create_confusion_matrix_chart(y_true, y_pred, num_classes): |
|
|
"""Create confusion matrix visualization using plotly""" |
|
|
cm = confusion_matrix(y_true, y_pred) |
|
|
labels = [f"Class {i}" for i in range(num_classes)] |
|
|
|
|
|
fig = px.imshow(cm, |
|
|
labels=dict(x="Predicted Label", y="True Label", color="Count"), |
|
|
x=labels, |
|
|
y=labels, |
|
|
text_auto=True, |
|
|
color_continuous_scale='Blues') |
|
|
|
|
|
fig.update_layout( |
|
|
title="Confusion Matrix (Validation Set)", |
|
|
plot_bgcolor="white", |
|
|
height=400, |
|
|
margin=dict(l=40, r=40, t=80, b=40) |
|
|
) |
|
|
return fig |
|
|
|
|
|
def run_softmax_regression_and_visualize(df, target_col, new_point_dict, |
|
|
epochs, learning_rate, batch_size_str="Full Batch", |
|
|
train_test_split_ratio=0.8): |
|
|
"""Run softmax regression training and generate visualizations""" |
|
|
X, y, num_classes, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict) |
|
|
|
|
|
if epochs < 1: |
|
|
return None, None, None, "Number of epochs must be ≥ 1.", None |
|
|
if learning_rate <= 0: |
|
|
return None, None, None, "Learning rate must be > 0.", None |
|
|
|
|
|
test_size = 1.0 - train_test_split_ratio |
|
|
|
|
|
|
|
|
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y) |
|
|
|
|
|
start_time = time.time() |
|
|
Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val_final, y_val_pred_final = train_softmax_regression_with_validation( |
|
|
X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size_str |
|
|
) |
|
|
training_time = time.time() - start_time |
|
|
|
|
|
_set_current_model({ |
|
|
"Theta": Theta, |
|
|
"feature_cols": feature_cols, |
|
|
"X_mean": X_mean, |
|
|
"X_std": X_std, |
|
|
"num_classes": num_classes |
|
|
}) |
|
|
|
|
|
|
|
|
new_point_norm = (new_point - X_mean) / X_std |
|
|
new_point_bias = add_bias(new_point_norm) |
|
|
prediction_proba = predict_proba(new_point_bias, Theta)[0] |
|
|
prediction_class = np.argmax(prediction_proba) |
|
|
|
|
|
final_train_loss = train_losses[-1] |
|
|
final_val_loss = val_losses[-1] |
|
|
final_train_acc = train_accuracies[-1] |
|
|
final_val_acc = val_accuracies[-1] |
|
|
|
|
|
train_loss_fig = create_training_loss_chart(train_losses, train_accuracies) |
|
|
val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies) |
|
|
|
|
|
|
|
|
results_display = create_results_display( |
|
|
Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, |
|
|
split_info={ |
|
|
"train_size": len(X_train), |
|
|
"val_size": len(X_val), |
|
|
"train_ratio": train_test_split_ratio, |
|
|
"val_ratio": 1.0 - train_test_split_ratio, |
|
|
"train_loss": final_train_loss, |
|
|
"val_loss": final_val_loss, |
|
|
"train_acc": final_train_acc, |
|
|
"val_acc": final_val_acc, |
|
|
"batch_size": batch_size_str, |
|
|
"training_time": training_time |
|
|
} |
|
|
) |
|
|
|
|
|
return train_loss_fig, val_loss_fig, results_display |
|
|
|
|
|
def create_training_loss_chart(train_losses, train_accuracies): |
|
|
"""Create training loss and accuracy visualization""" |
|
|
if not train_losses or len(train_losses) == 0: |
|
|
return None |
|
|
|
|
|
epochs = list(range(1, len(train_losses) + 1)) |
|
|
valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in train_losses] |
|
|
|
|
|
fig = make_subplots( |
|
|
rows=2, cols=1, |
|
|
subplot_titles=("Training Loss (Categorical Cross-Entropy)", "Training Accuracy"), |
|
|
vertical_spacing=0.15, |
|
|
row_heights=[0.5, 0.5] |
|
|
) |
|
|
|
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=epochs, |
|
|
y=valid_losses, |
|
|
mode='lines+markers', |
|
|
name='Training Loss', |
|
|
line=dict(color='#1976D2', width=3), |
|
|
marker=dict(size=6), |
|
|
showlegend=True |
|
|
), |
|
|
row=1, col=1 |
|
|
) |
|
|
|
|
|
if train_accuracies and len(train_accuracies) == len(train_losses): |
|
|
valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in train_accuracies] |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=epochs, |
|
|
y=valid_accuracies, |
|
|
mode='lines+markers', |
|
|
name='Training Accuracy', |
|
|
line=dict(color='#42A5F5', width=3), |
|
|
marker=dict(size=6), |
|
|
showlegend=True |
|
|
), |
|
|
row=2, col=1 |
|
|
) |
|
|
|
|
|
fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100]) |
|
|
|
|
|
fig.update_layout( |
|
|
title="Training Metrics Over Epochs", |
|
|
plot_bgcolor="white", |
|
|
height=600, |
|
|
margin=dict(l=40, r=40, t=80, b=40) |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
def create_validation_loss_chart(val_losses, val_accuracies): |
|
|
"""Create validation loss and accuracy visualization""" |
|
|
if not val_losses or len(val_losses) == 0: |
|
|
return None |
|
|
|
|
|
epochs = list(range(1, len(val_losses) + 1)) |
|
|
valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in val_losses] |
|
|
|
|
|
fig = make_subplots( |
|
|
rows=2, cols=1, |
|
|
subplot_titles=("Validation Loss (Categorical Cross-Entropy)", "Validation Accuracy"), |
|
|
vertical_spacing=0.15, |
|
|
row_heights=[0.5, 0.5] |
|
|
) |
|
|
|
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=epochs, |
|
|
y=valid_losses, |
|
|
mode='lines+markers', |
|
|
name='Validation Loss', |
|
|
line=dict(color='#7B1FA2', width=3), |
|
|
marker=dict(size=6), |
|
|
showlegend=True |
|
|
), |
|
|
row=1, col=1 |
|
|
) |
|
|
|
|
|
if val_accuracies and len(val_accuracies) == len(val_losses): |
|
|
valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in val_accuracies] |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=epochs, |
|
|
y=valid_accuracies, |
|
|
mode='lines+markers', |
|
|
name='Validation Accuracy', |
|
|
line=dict(color='#BA68C8', width=3), |
|
|
marker=dict(size=6), |
|
|
showlegend=True |
|
|
), |
|
|
row=2, col=1 |
|
|
) |
|
|
|
|
|
fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray') |
|
|
fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100]) |
|
|
|
|
|
fig.update_layout( |
|
|
title="Validation Metrics Over Epochs", |
|
|
plot_bgcolor="white", |
|
|
height=600, |
|
|
margin=dict(l=40, r=40, t=80, b=40) |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info): |
|
|
"""Create HTML display showing model results""" |
|
|
|
|
|
|
|
|
theta_shape_str = f"{Theta.shape[0]} x {Theta.shape[1]}" |
|
|
|
|
|
|
|
|
proba_str = "<br>".join([f"• Class {i}: <strong>{p:.4f}</strong> ({p*100:.2f}%)" for i, p in enumerate(prediction_proba)]) |
|
|
|
|
|
html_content = f""" |
|
|
<div style='background:#5eb4f2;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'> |
|
|
<strong style='color:#0D47A1;'>📊 Softmax Regression Results</strong><br><br> |
|
|
|
|
|
<div style='margin:8px 0;'> |
|
|
<strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br> |
|
|
• Epochs: {epochs} | Learning Rate: {learning_rate}<br> |
|
|
• Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)} | Classes: {num_classes}<br> |
|
|
• Normalization: Standardized | Activation: Softmax | Loss: Categorical Cross-Entropy<br> |
|
|
</div> |
|
|
|
|
|
<div style='margin:8px 0;'> |
|
|
<strong style='color:#1976D2;'>📊 Data Split:</strong><br> |
|
|
• Training: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})<br> |
|
|
• Validation: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})<br> |
|
|
</div> |
|
|
|
|
|
<div style='margin:8px 0;'> |
|
|
<strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br> |
|
|
• Training Loss (CCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br> |
|
|
• Validation Loss (CCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br> |
|
|
• Training Accuracy: <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc']*100:.2f}%</strong></span><br> |
|
|
• Validation Accuracy: <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc']*100:.2f}%</strong></span><br> |
|
|
• Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br> |
|
|
</div> |
|
|
|
|
|
<div style='margin:8px 0;'> |
|
|
<strong style='color:#1976D2;'>🎯 Learned Parameters (Θ):</strong><br> |
|
|
• Theta Shape = <code style='background:#87BAC3;padding:2px 6px;border-radius:4px;'>{theta_shape_str}</code> (Features+Bias x Classes)<br> |
|
|
</div> |
|
|
|
|
|
<div style='margin:8px 0;'> |
|
|
<strong style='color:#1976D2;'>🔮 Prediction for New Point:</strong><br> |
|
|
• Predicted Class: <span style='background:#FE6244;padding:2px 6px;border-radius:4px;font-size:1.1em;'><strong>Class {prediction_class}</strong></span><br> |
|
|
<div style='margin-top:8px;font-size:0.95em;'> |
|
|
<strong>Class Probabilities:</strong><br> |
|
|
{proba_str} |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
return html_content |