import pandas as pd
import numpy as np
from sklearn.datasets import load_iris, load_wine, make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import time
_current_model_params = None
def _get_current_model():
return _current_model_params
def _set_current_model(params):
global _current_model_params
_current_model_params = params
def load_data(file_obj=None, dataset_choice="Iris"):
"""Load multi-class classification datasets"""
if file_obj is not None:
if file_obj.name.endswith(".csv"):
encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
for encoding in encodings:
try:
return pd.read_csv(file_obj.name, encoding=encoding)
except UnicodeDecodeError:
continue
return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace")
elif file_obj.name.endswith((".xlsx", ".xls")):
return pd.read_excel(file_obj.name)
else:
raise ValueError("Unsupported format. Upload CSV or Excel files.")
datasets = {
"Iris": lambda: _sklearn_to_df(load_iris()),
"Wine": lambda: _sklearn_to_df(load_wine()),
"Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
"Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
}
if dataset_choice not in datasets:
# Fallback if choice is invalid
return datasets["Iris"]()
return datasets[dataset_choice]()
def _sklearn_to_df(data):
"""Convert sklearn dataset to DataFrame"""
df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
if df.columns.isnull().any():
df.columns = [f"feature_{i}" for i in range(df.shape[1])]
df["target"] = data.target
return df
def _synthetic_multiclass(n_classes=3):
"""Generate synthetic multi-class classification dataset"""
X, y = make_classification(n_samples=1000, n_features=10, n_informative=8,
n_redundant=2, n_classes=n_classes, random_state=42)
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
df["target"] = y
return df
def create_input_components(df, target_col):
"""Create input components for feature values"""
feature_cols = [c for c in df.columns if c != target_col]
components = []
for col in feature_cols:
data = df[col]
val = pd.to_numeric(data, errors="coerce").dropna().mean()
val = 0.0 if pd.isna(val) else float(val)
components.append(
{
"name": col,
"type": "number",
"value": round(val, 3),
"minimum": None,
"maximum": None,
}
)
return components
def one_hot_encode(y, num_classes):
"""Convert integer labels to one-hot encoded vectors"""
return np.eye(num_classes)[y]
def preprocess_data(df, target_col, new_point_dict):
"""Preprocess data for softmax regression"""
feature_cols = [c for c in df.columns if c != target_col]
X = df[feature_cols].copy()
y = df[target_col].copy()
# Convert to numeric
for col in feature_cols:
X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
# Ensure target is numeric and get number of classes
y = pd.to_numeric(y, errors="coerce").fillna(0).astype(int)
num_classes = len(np.unique(y))
if num_classes < 2:
raise ValueError(f"Target must have at least 2 classes. Found {num_classes}.")
# Prepare new point
new_point = []
for col in feature_cols:
if col in new_point_dict:
try:
new_point.append(float(new_point_dict[col]))
except Exception:
new_point.append(0.0)
else:
new_point.append(0.0)
new_point = np.array(new_point, dtype=float).reshape(1, -1)
return X.values, y.values, num_classes, new_point, feature_cols
def add_bias(X):
"""Add bias column to feature matrix"""
return np.c_[np.ones(X.shape[0]), X]
def softmax(Z):
"""Softmax activation function: exp(z_k) / sum(exp(z_j))"""
# Shift Z for numerical stability to avoid overflow with exp()
Z_shifted = Z - np.max(Z, axis=1, keepdims=True)
exp_Z = np.exp(Z_shifted)
return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
def predict_proba(X, Theta):
"""Make probability predictions: Y_hat = softmax(X @ Theta)"""
Z = X.dot(Theta)
return softmax(Z)
def predict_class(X, Theta):
"""Make class predictions using argmax"""
proba = predict_proba(X, Theta)
return np.argmax(proba, axis=1)
def compute_loss(Y_hat, Y_one_hot):
"""Compute Categorical Cross-Entropy loss: -sum(y_k * log(y_hat_k))"""
eps = 1e-15
Y_hat = np.clip(Y_hat, eps, 1 - eps)
return -np.mean(np.sum(Y_one_hot * np.log(Y_hat), axis=1))
def compute_gradient(Y_hat, Y_one_hot, X):
"""Compute gradient: X.T @ (Y_hat - Y_one_hot) / N"""
N = X.shape[0]
return X.T.dot(Y_hat - Y_one_hot) / N
def update_theta(Theta, gradient, lr):
"""Update parameters using gradient descent"""
return Theta - lr * gradient
def compute_accuracy(y_true, y_pred):
"""Compute classification accuracy"""
return np.mean(y_true == y_pred)
def normalize_features(X_train, X_val=None, X_test=None):
"""Normalize features using standardization (zero mean, unit variance)"""
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)
std[std == 0] = 1
X_train_norm = (X_train - mean) / std
X_val_norm = (X_val - mean) / std if X_val is not None else None
X_test_norm = (X_test - mean) / std if X_test is not None else None
return X_train_norm, X_val_norm, X_test_norm, mean, std
def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size=None):
"""
Train softmax regression with mini-batch gradient descent
Returns:
Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
"""
X_train_norm, X_val_norm, _, X_mean, X_std = normalize_features(X_train, X_val)
X_train_bias = add_bias(X_train_norm)
X_val_bias = add_bias(X_val_norm)
# Initialize Theta: (n_features + 1) x num_classes
np.random.seed(42)
Theta = np.random.randn(X_train_bias.shape[1], num_classes) * 0.01
# One-hot encode targets
Y_train_one_hot = one_hot_encode(y_train, num_classes)
Y_val_one_hot = one_hot_encode(y_val, num_classes)
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
n_samples = X_train_bias.shape[0]
if batch_size is None or batch_size == "Full Batch" or int(batch_size) >= n_samples:
actual_batch_size = n_samples
else:
actual_batch_size = int(batch_size)
for epoch in range(epochs):
# Shuffle training data
indices = np.random.permutation(n_samples)
X_train_shuffled = X_train_bias[indices]
Y_train_one_hot_shuffled = Y_train_one_hot[indices]
for i in range(0, n_samples, actual_batch_size):
X_batch = X_train_shuffled[i:i+actual_batch_size]
Y_batch = Y_train_one_hot_shuffled[i:i+actual_batch_size]
Y_batch_hat = predict_proba(X_batch, Theta)
gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch)
Theta = update_theta(Theta, gradient, learning_rate)
# Compute metrics
Y_train_hat = predict_proba(X_train_bias, Theta)
train_loss = compute_loss(Y_train_hat, Y_train_one_hot)
train_losses.append(train_loss)
y_train_pred = predict_class(X_train_bias, Theta)
train_acc = compute_accuracy(y_train, y_train_pred)
train_accuracies.append(train_acc)
Y_val_hat = predict_proba(X_val_bias, Theta)
val_loss = compute_loss(Y_val_hat, Y_val_one_hot)
val_losses.append(val_loss)
y_val_pred = predict_class(X_val_bias, Theta)
val_acc = compute_accuracy(y_val, y_val_pred)
val_accuracies.append(val_acc)
return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val, y_val_pred
def create_confusion_matrix_chart(y_true, y_pred, num_classes):
"""Create confusion matrix visualization using plotly"""
cm = confusion_matrix(y_true, y_pred)
labels = [f"Class {i}" for i in range(num_classes)]
fig = px.imshow(cm,
labels=dict(x="Predicted Label", y="True Label", color="Count"),
x=labels,
y=labels,
text_auto=True,
color_continuous_scale='Blues')
fig.update_layout(
title="Confusion Matrix (Validation Set)",
plot_bgcolor="white",
height=400,
margin=dict(l=40, r=40, t=80, b=40)
)
return fig
def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
epochs, learning_rate, batch_size_str="Full Batch",
train_test_split_ratio=0.8):
"""Run softmax regression training and generate visualizations"""
X, y, num_classes, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict)
if epochs < 1:
return None, None, None, "Number of epochs must be ≥ 1.", None
if learning_rate <= 0:
return None, None, None, "Learning rate must be > 0.", None
test_size = 1.0 - train_test_split_ratio
# Ensure stratify works even with small classes by checking counts if needed,
# but for simplicity we'll assume data is sufficient for demo.
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
start_time = time.time()
Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val_final, y_val_pred_final = train_softmax_regression_with_validation(
X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size_str
)
training_time = time.time() - start_time
_set_current_model({
"Theta": Theta,
"feature_cols": feature_cols,
"X_mean": X_mean,
"X_std": X_std,
"num_classes": num_classes
})
# Make prediction for new point
new_point_norm = (new_point - X_mean) / X_std
new_point_bias = add_bias(new_point_norm)
prediction_proba = predict_proba(new_point_bias, Theta)[0]
prediction_class = np.argmax(prediction_proba)
final_train_loss = train_losses[-1]
final_val_loss = val_losses[-1]
final_train_acc = train_accuracies[-1]
final_val_acc = val_accuracies[-1]
train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
# confusion_fig = create_confusion_matrix_chart(y_val_final, y_val_pred_final, num_classes)
results_display = create_results_display(
Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes,
split_info={
"train_size": len(X_train),
"val_size": len(X_val),
"train_ratio": train_test_split_ratio,
"val_ratio": 1.0 - train_test_split_ratio,
"train_loss": final_train_loss,
"val_loss": final_val_loss,
"train_acc": final_train_acc,
"val_acc": final_val_acc,
"batch_size": batch_size_str,
"training_time": training_time
}
)
return train_loss_fig, val_loss_fig, results_display
def create_training_loss_chart(train_losses, train_accuracies):
"""Create training loss and accuracy visualization"""
if not train_losses or len(train_losses) == 0:
return None
epochs = list(range(1, len(train_losses) + 1))
valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in train_losses]
fig = make_subplots(
rows=2, cols=1,
subplot_titles=("Training Loss (Categorical Cross-Entropy)", "Training Accuracy"),
vertical_spacing=0.15,
row_heights=[0.5, 0.5]
)
fig.add_trace(
go.Scatter(
x=epochs,
y=valid_losses,
mode='lines+markers',
name='Training Loss',
line=dict(color='#1976D2', width=3),
marker=dict(size=6),
showlegend=True
),
row=1, col=1
)
if train_accuracies and len(train_accuracies) == len(train_losses):
valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in train_accuracies]
fig.add_trace(
go.Scatter(
x=epochs,
y=valid_accuracies,
mode='lines+markers',
name='Training Accuracy',
line=dict(color='#42A5F5', width=3),
marker=dict(size=6),
showlegend=True
),
row=2, col=1
)
fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
fig.update_layout(
title="Training Metrics Over Epochs",
plot_bgcolor="white",
height=600,
margin=dict(l=40, r=40, t=80, b=40)
)
return fig
def create_validation_loss_chart(val_losses, val_accuracies):
"""Create validation loss and accuracy visualization"""
if not val_losses or len(val_losses) == 0:
return None
epochs = list(range(1, len(val_losses) + 1))
valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in val_losses]
fig = make_subplots(
rows=2, cols=1,
subplot_titles=("Validation Loss (Categorical Cross-Entropy)", "Validation Accuracy"),
vertical_spacing=0.15,
row_heights=[0.5, 0.5]
)
fig.add_trace(
go.Scatter(
x=epochs,
y=valid_losses,
mode='lines+markers',
name='Validation Loss',
line=dict(color='#7B1FA2', width=3),
marker=dict(size=6),
showlegend=True
),
row=1, col=1
)
if val_accuracies and len(val_accuracies) == len(val_losses):
valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in val_accuracies]
fig.add_trace(
go.Scatter(
x=epochs,
y=valid_accuracies,
mode='lines+markers',
name='Validation Accuracy',
line=dict(color='#BA68C8', width=3),
marker=dict(size=6),
showlegend=True
),
row=2, col=1
)
fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
fig.update_layout(
title="Validation Metrics Over Epochs",
plot_bgcolor="white",
height=600,
margin=dict(l=40, r=40, t=80, b=40)
)
return fig
def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info):
"""Create HTML display showing model results"""
# Format Theta for display (just showing shape or first few parameters if needed, usually too large for multi-class)
theta_shape_str = f"{Theta.shape[0]} x {Theta.shape[1]}"
# Format predicted probabilities for each class
proba_str = "
".join([f"• Class {i}: {p:.4f} ({p*100:.2f}%)" for i, p in enumerate(prediction_proba)])
html_content = f"""
{theta_shape_str} (Features+Bias x Classes)