plethegenuine1908 commited on
Commit
bd037d7
·
verified ·
1 Parent(s): 5dbb53f

Update src/softmax_regression.py

Browse files
Files changed (1) hide show
  1. src/softmax_regression.py +96 -731
src/softmax_regression.py CHANGED
@@ -1,508 +1,11 @@
1
- # import pandas as pd
2
- # import numpy as np
3
- # from sklearn.datasets import load_iris, load_wine, make_classification
4
- # from sklearn.model_selection import train_test_split
5
- # from plotly.subplots import make_subplots
6
- # import plotly.graph_objects as go
7
- # import time
8
-
9
- # _current_model_params = None
10
-
11
- # def _get_current_model():
12
- # return _current_model_params
13
-
14
- # def _set_current_model(params):
15
- # global _current_model_params
16
- # _current_model_params = params
17
-
18
-
19
- # def load_data(file_obj=None, dataset_choice="Breast Cancer"):
20
- # """Load binary classification datasets"""
21
- # if file_obj is not None:
22
- # if file_obj.name.endswith(".csv"):
23
- # encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
24
- # for encoding in encodings:
25
- # try:
26
- # return pd.read_csv(file_obj.name, encoding=encoding)
27
- # except UnicodeDecodeError:
28
- # continue
29
- # return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace")
30
- # elif file_obj.name.endswith((".xlsx", ".xls")):
31
- # return pd.read_excel(file_obj.name)
32
- # else:
33
- # raise ValueError("Unsupported format. Upload CSV or Excel files.")
34
-
35
- # datasets = {
36
- # "Iris": lambda: _sklearn_to_df(load_iris()),
37
- # "Wine": lambda: _sklearn_to_df(load_wine()),
38
- # "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
39
- # "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
40
- # }
41
- # if dataset_choice not in datasets:
42
- # raise ValueError(f"Unknown dataset: {dataset_choice}")
43
- # return datasets[dataset_choice]()
44
-
45
-
46
- # def _sklearn_to_df(data):
47
- # """Convert sklearn dataset to DataFrame"""
48
- # df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
49
- # if df.columns.isnull().any():
50
- # df.columns = [f"feature_{i}" for i in range(df.shape[1])]
51
- # df["target"] = data.target
52
- # return df
53
-
54
-
55
- # # def _wine_to_binary_df(wine_data):
56
- # # """Convert wine dataset to binary classification (class 0 vs others)"""
57
- # # df = pd.DataFrame(wine_data.data, columns=wine_data.feature_names)
58
- # # df["target"] = (wine_data.target == 0).astype(int)
59
- # # return df
60
-
61
-
62
- # def _synthetic_classification():
63
- # """Generate synthetic binary classification dataset"""
64
- # X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
65
- # n_redundant=5, n_classes=2, random_state=42)
66
- # df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
67
- # df["target"] = y
68
- # return df
69
-
70
-
71
- # def create_input_components(df, target_col):
72
- # """Create input components for feature values"""
73
- # feature_cols = [c for c in df.columns if c != target_col]
74
- # components = []
75
- # for col in feature_cols:
76
- # data = df[col]
77
- # val = pd.to_numeric(data, errors="coerce").dropna().mean()
78
- # val = 0.0 if pd.isna(val) else float(val)
79
- # components.append(
80
- # {
81
- # "name": col,
82
- # "type": "number",
83
- # "value": round(val, 3),
84
- # "minimum": None,
85
- # "maximum": None,
86
- # }
87
- # )
88
- # return components
89
-
90
-
91
- # def preprocess_data(df, target_col, new_point_dict):
92
- # """Preprocess data for logistic regression"""
93
- # feature_cols = [c for c in df.columns if c != target_col]
94
- # X = df[feature_cols].copy()
95
- # y = df[target_col].copy()
96
-
97
- # # Convert to numeric
98
- # for col in feature_cols:
99
- # X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
100
-
101
- # # Ensure binary target (0 or 1)
102
- # unique_vals = sorted(y.unique())
103
- # if len(unique_vals) != 2:
104
- # raise ValueError(f"Target must be binary (0/1). Found {len(unique_vals)} unique values: {unique_vals}")
105
-
106
- # # Map to 0/1 if needed
107
- # y_mapped = y.copy()
108
- # if set(unique_vals) != {0, 1}:
109
- # mapping = {unique_vals[0]: 0, unique_vals[1]: 1}
110
- # y_mapped = y.map(mapping)
111
-
112
- # # Prepare new point
113
- # new_point = []
114
- # for col in feature_cols:
115
- # if col in new_point_dict:
116
- # try:
117
- # new_point.append(float(new_point_dict[col]))
118
- # except Exception:
119
- # new_point.append(0.0)
120
- # else:
121
- # new_point.append(0.0)
122
-
123
- # new_point = np.array(new_point, dtype=float).reshape(1, -1)
124
-
125
- # return X.values, np.array(y_mapped, dtype=int), new_point, feature_cols
126
-
127
-
128
- # def add_bias(X):
129
- # """Add bias column to feature matrix"""
130
- # return np.c_[np.ones(X.shape[0]), X]
131
-
132
-
133
- # # def sigmoid(z):
134
- # # """Sigmoid activation function: σ(z) = 1 / (1 + exp(-z))"""
135
- # # z = np.clip(z, -500, 500)
136
- # # return 1 / (1 + np.exp(-z))
137
-
138
- # def softmax(Z):
139
- # Z_shifted = Z - np.max(Z, axis=1, keepdims=True) # Numerical stability
140
- # exp_Z = np.exp(Z_shifted)
141
- # return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
142
-
143
- # def predict_proba(X, theta):
144
- # """Make probability predictions: y_hat = softmax(X @ theta)"""
145
- # z = X.dot(theta)
146
- # return softmax(z)
147
-
148
-
149
- # def predict_class(X, theta, threshold=0.5):
150
- # """Make binary class predictions using threshold"""
151
- # proba = predict_proba(X, theta)
152
- # return (proba >= threshold).astype(int)
153
-
154
-
155
- # def compute_loss(y_hat, y):
156
- # """Compute Binary Cross-Entropy loss: -[y*log(ŷ) + (1-y)*log(1-ŷ)]"""
157
- # eps = 1e-15
158
- # y_hat = np.clip(y_hat, eps, 1 - eps)
159
- # loss = -(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
160
- # return np.mean(loss)
161
-
162
-
163
- # def compute_gradient(y_hat, y, X):
164
- # """Compute gradient: X.T @ (y_hat - y) / N"""
165
- # N = len(y)
166
- # return X.T.dot(y_hat - y) / N
167
-
168
-
169
- # def update_theta(theta, gradient, lr):
170
- # """Update parameters using gradient descent"""
171
- # return theta - lr * gradient
172
-
173
-
174
- # def compute_accuracy(y_true, y_pred):
175
- # """Compute classification accuracy"""
176
- # return np.mean(y_true == y_pred)
177
-
178
-
179
- # def normalize_features(X_train, X_val=None, X_test=None):
180
- # """Normalize features using standardization (zero mean, unit variance)"""
181
- # mean = np.mean(X_train, axis=0)
182
- # std = np.std(X_train, axis=0)
183
- # std[std == 0] = 1
184
-
185
- # X_train_norm = (X_train - mean) / std
186
- # X_val_norm = (X_val - mean) / std if X_val is not None else None
187
- # X_test_norm = (X_test - mean) / std if X_test is not None else None
188
-
189
- # return X_train_norm, X_val_norm, X_test_norm, mean, std
190
-
191
-
192
- # def train_logistic_regression_with_validation(X_train, y_train, X_val, y_val, epochs, learning_rate, batch_size=None):
193
- # """
194
- # Train logistic regression with mini-batch gradient descent
195
-
196
- # Returns:
197
- # theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
198
- # """
199
- # X_train_norm, X_val_norm, _, X_mean, X_std = normalize_features(X_train, X_val)
200
-
201
- # X_train_bias = add_bias(X_train_norm)
202
- # X_val_bias = add_bias(X_val_norm)
203
-
204
- # np.random.seed(42)
205
- # theta = np.random.randn(X_train_bias.shape[1]) * 0.01
206
-
207
- # train_losses = []
208
- # val_losses = []
209
- # train_accuracies = []
210
- # val_accuracies = []
211
-
212
- # n_samples = X_train_bias.shape[0]
213
-
214
- # if batch_size is None or batch_size >= n_samples:
215
- # actual_batch_size = n_samples
216
- # else:
217
- # actual_batch_size = batch_size
218
-
219
- # for epoch in range(epochs):
220
- # if actual_batch_size < n_samples:
221
- # indices = np.random.permutation(n_samples)
222
- # X_train_shuffled = X_train_bias[indices]
223
- # y_train_shuffled = y_train[indices]
224
- # else:
225
- # X_train_shuffled = X_train_bias
226
- # y_train_shuffled = y_train
227
-
228
- # for i in range(0, n_samples, actual_batch_size):
229
- # X_batch = X_train_shuffled[i:i+actual_batch_size]
230
- # y_batch = y_train_shuffled[i:i+actual_batch_size]
231
-
232
- # y_batch_hat = predict_proba(X_batch, theta)
233
- # gradient = compute_gradient(y_batch_hat, y_batch, X_batch)
234
- # theta = update_theta(theta, gradient, learning_rate)
235
-
236
- # y_train_hat = predict_proba(X_train_bias, theta)
237
- # train_loss = compute_loss(y_train_hat, y_train)
238
- # train_losses.append(train_loss)
239
-
240
- # y_train_pred = predict_class(X_train_bias, theta)
241
- # train_acc = compute_accuracy(y_train, y_train_pred)
242
- # train_accuracies.append(train_acc)
243
-
244
- # y_val_hat = predict_proba(X_val_bias, theta)
245
- # val_loss = compute_loss(y_val_hat, y_val)
246
- # val_losses.append(val_loss)
247
-
248
- # y_val_pred = predict_class(X_val_bias, theta)
249
- # val_acc = compute_accuracy(y_val, y_val_pred)
250
- # val_accuracies.append(val_acc)
251
-
252
- # return theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
253
-
254
-
255
- # def run_logistic_regression_and_visualize(df, target_col, new_point_dict,
256
- # epochs, learning_rate, batch_size_str="Full Batch",
257
- # train_test_split_ratio=0.8, threshold=0.5):
258
- # """Run logistic regression training and generate visualizations"""
259
- # X, y, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict)
260
-
261
- # if epochs < 1:
262
- # return None, None, None, "Number of epochs must be ≥ 1.", None
263
- # if learning_rate <= 0:
264
- # return None, None, None, "Learning rate must be > 0.", None
265
-
266
- # test_size = 1.0 - train_test_split_ratio
267
- # X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
268
-
269
- # if batch_size_str == "Full Batch":
270
- # batch_size = None
271
- # else:
272
- # batch_size = int(batch_size_str)
273
-
274
- # start_time = time.time()
275
- # theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std = train_logistic_regression_with_validation(
276
- # X_train, y_train, X_val, y_val, epochs, learning_rate, batch_size
277
- # )
278
- # training_time = time.time() - start_time
279
-
280
- # _set_current_model({
281
- # "theta": theta,
282
- # "feature_cols": feature_cols,
283
- # "X_mean": X_mean,
284
- # "X_std": X_std
285
- # })
286
-
287
- # # Prepare normalized data for prediction with threshold
288
- # X_train_norm, X_val_norm, _, _, _ = normalize_features(X_train, X_val)
289
- # X_train_bias = add_bias(X_train_norm)
290
- # X_val_bias = add_bias(X_val_norm)
291
-
292
- # # Make prediction with threshold
293
- # new_point_norm = (new_point - X_mean) / X_std
294
- # new_point_bias = add_bias(new_point_norm)
295
- # prediction_proba = predict_proba(new_point_bias, theta)[0]
296
- # prediction_class = predict_class(new_point_bias, theta, threshold)[0]
297
-
298
- # # Compute metrics with threshold
299
- # y_train_pred_thresh = predict_class(X_train_bias, theta, threshold)
300
- # y_val_pred_thresh = predict_class(X_val_bias, theta, threshold)
301
- # train_acc_thresh = compute_accuracy(y_train, y_train_pred_thresh)
302
- # val_acc_thresh = compute_accuracy(y_val, y_val_pred_thresh)
303
-
304
- # final_train_loss = train_losses[-1]
305
- # final_val_loss = val_losses[-1]
306
- # final_train_acc = train_accuracies[-1]
307
- # final_val_acc = val_accuracies[-1]
308
-
309
- # train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
310
- # val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
311
-
312
- # results_display = create_results_display(
313
- # theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, threshold,
314
- # split_info={
315
- # "train_size": len(X_train),
316
- # "val_size": len(X_val),
317
- # "train_ratio": train_test_split_ratio,
318
- # "val_ratio": 1.0 - train_test_split_ratio,
319
- # "train_loss": final_train_loss,
320
- # "val_loss": final_val_loss,
321
- # "train_acc": final_train_acc,
322
- # "val_acc": final_val_acc,
323
- # "train_acc_thresh": train_acc_thresh,
324
- # "val_acc_thresh": val_acc_thresh,
325
- # "batch_size": batch_size_str,
326
- # "training_time": training_time
327
- # }
328
- # )
329
-
330
- # return train_loss_fig, val_loss_fig, results_display, prediction_proba
331
-
332
-
333
- # def create_training_loss_chart(train_losses, train_accuracies):
334
- # """Create training loss and accuracy visualization"""
335
- # if not train_losses or len(train_losses) == 0:
336
- # return None
337
-
338
- # epochs = list(range(1, len(train_losses) + 1))
339
- # valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in train_losses]
340
-
341
- # fig = make_subplots(
342
- # rows=2, cols=1,
343
- # subplot_titles=("Training Loss (Binary Cross-Entropy)", "Training Accuracy"),
344
- # vertical_spacing=0.15,
345
- # row_heights=[0.5, 0.5]
346
- # )
347
-
348
- # fig.add_trace(
349
- # go.Scatter(
350
- # x=epochs,
351
- # y=valid_losses,
352
- # mode='lines+markers',
353
- # name='Training Loss',
354
- # line=dict(color='#1976D2', width=3),
355
- # marker=dict(size=6),
356
- # showlegend=True
357
- # ),
358
- # row=1, col=1
359
- # )
360
-
361
- # if train_accuracies and len(train_accuracies) == len(train_losses):
362
- # valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in train_accuracies]
363
- # fig.add_trace(
364
- # go.Scatter(
365
- # x=epochs,
366
- # y=valid_accuracies,
367
- # mode='lines+markers',
368
- # name='Training Accuracy',
369
- # line=dict(color='#42A5F5', width=3),
370
- # marker=dict(size=6),
371
- # showlegend=True
372
- # ),
373
- # row=2, col=1
374
- # )
375
-
376
- # fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
377
- # fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
378
- # fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
379
- # fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
380
-
381
- # fig.update_layout(
382
- # title="Training Metrics Over Epochs",
383
- # plot_bgcolor="white",
384
- # height=600,
385
- # margin=dict(l=40, r=40, t=80, b=40)
386
- # )
387
-
388
- # return fig
389
-
390
-
391
- # def create_validation_loss_chart(val_losses, val_accuracies):
392
- # """Create validation loss and accuracy visualization"""
393
- # if not val_losses or len(val_losses) == 0:
394
- # return None
395
-
396
- # epochs = list(range(1, len(val_losses) + 1))
397
- # valid_losses = [loss if not (np.isinf(loss) or np.isnan(loss)) else None for loss in val_losses]
398
-
399
- # fig = make_subplots(
400
- # rows=2, cols=1,
401
- # subplot_titles=("Validation Loss (Binary Cross-Entropy)", "Validation Accuracy"),
402
- # vertical_spacing=0.15,
403
- # row_heights=[0.5, 0.5]
404
- # )
405
-
406
- # fig.add_trace(
407
- # go.Scatter(
408
- # x=epochs,
409
- # y=valid_losses,
410
- # mode='lines+markers',
411
- # name='Validation Loss',
412
- # line=dict(color='#7B1FA2', width=3),
413
- # marker=dict(size=6),
414
- # showlegend=True
415
- # ),
416
- # row=1, col=1
417
- # )
418
-
419
- # if val_accuracies and len(val_accuracies) == len(val_losses):
420
- # valid_accuracies = [acc * 100 if not (np.isinf(acc) or np.isnan(acc)) else None for acc in val_accuracies]
421
- # fig.add_trace(
422
- # go.Scatter(
423
- # x=epochs,
424
- # y=valid_accuracies,
425
- # mode='lines+markers',
426
- # name='Validation Accuracy',
427
- # line=dict(color='#BA68C8', width=3),
428
- # marker=dict(size=6),
429
- # showlegend=True
430
- # ),
431
- # row=2, col=1
432
- # )
433
-
434
- # fig.update_xaxes(title_text="Epoch", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
435
- # fig.update_yaxes(title_text="Loss", row=1, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
436
- # fig.update_xaxes(title_text="Epoch", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray')
437
- # fig.update_yaxes(title_text="Accuracy (%)", row=2, col=1, showgrid=True, gridwidth=1, gridcolor='lightgray', range=[0, 100])
438
-
439
- # fig.update_layout(
440
- # title="Validation Metrics Over Epochs",
441
- # plot_bgcolor="white",
442
- # height=600,
443
- # margin=dict(l=40, r=40, t=80, b=40)
444
- # )
445
-
446
- # return fig
447
-
448
-
449
- # def create_results_display(theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, threshold, split_info):
450
- # """Create HTML display showing model results"""
451
-
452
- # theta_str = f"[{theta[0]:.4f}"
453
- # for i, w in enumerate(theta[1:]):
454
- # theta_str += f", {w:.4f}"
455
- # theta_str += "]"
456
-
457
- # html_content = f"""
458
- # <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
459
- # <strong style='color:#0D47A1;'>📊 Logistic Regression Results</strong><br><br>
460
-
461
- # <div style='margin:8px 0;'>
462
- # <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
463
- # • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
464
- # • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)}<br>
465
- # • Normalization: Standardized | Activation: Sigmoid | Loss: Binary Cross-Entropy<br>
466
- # </div>
467
-
468
- # <div style='margin:8px 0;'>
469
- # <strong style='color:#1976D2;'>📊 Data Split:</strong><br>
470
- # • Training: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})<br>
471
- # • Validation: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})<br>
472
- # </div>
473
-
474
- # <div style='margin:8px 0;'>
475
- # <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
476
- # • Training Loss (BCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
477
- # • Validation Loss (BCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
478
- # • Training Accuracy (threshold={threshold:.2f}): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc_thresh']*100:.2f}%</strong></span><br>
479
- # • Validation Accuracy (threshold={threshold:.2f}): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc_thresh']*100:.2f}%</strong></span><br>
480
- # • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
481
- # </div>
482
-
483
- # <div style='margin:8px 0;'>
484
- # <strong style='color:#1976D2;'>🎯 Learned Parameters (θ):</strong><br>
485
- # • Theta = <code style='background:#F3E5F5;padding:2px 6px;border-radius:4px;'>{theta_str}</code><br>
486
- # • Bias (θ₀) = {theta[0]:.4f}<br>
487
- # </div>
488
-
489
- # <div style='margin:8px 0;'>
490
- # <strong style='color:#1976D2;'>🔮 Prediction (Threshold = {threshold:.2f}):</strong><br>
491
- # • Probability: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;'><strong>{prediction_proba:.4f}</strong></span> ({(prediction_proba*100):.2f}%)<br>
492
- # • Predicted Class: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;'><strong>{prediction_class}</strong></span> (0 = Class 0, 1 = Class 1)<br>
493
- # <em style='font-size:0.9em;color:#424242;'>* Adjust threshold to see how predictions change. Lower threshold → more predictions of class 1</em><br>
494
- # </div>
495
- # </div>
496
- # """
497
-
498
- # return html_content
499
-
500
  import pandas as pd
501
  import numpy as np
502
  from sklearn.datasets import load_iris, load_wine, make_classification
503
  from sklearn.model_selection import train_test_split
 
504
  from plotly.subplots import make_subplots
505
  import plotly.graph_objects as go
 
506
  import time
507
 
508
  _current_model_params = None
@@ -514,9 +17,8 @@ def _set_current_model(params):
514
  global _current_model_params
515
  _current_model_params = params
516
 
517
-
518
  def load_data(file_obj=None, dataset_choice="Iris"):
519
- """Load multiclass classification datasets"""
520
  if file_obj is not None:
521
  if file_obj.name.endswith(".csv"):
522
  encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
@@ -537,11 +39,12 @@ def load_data(file_obj=None, dataset_choice="Iris"):
537
  "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
538
  "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
539
  }
 
540
  if dataset_choice not in datasets:
541
- raise ValueError(f"Unknown dataset: {dataset_choice}")
 
542
  return datasets[dataset_choice]()
543
 
544
-
545
  def _sklearn_to_df(data):
546
  """Convert sklearn dataset to DataFrame"""
547
  df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
@@ -550,23 +53,14 @@ def _sklearn_to_df(data):
550
  df["target"] = data.target
551
  return df
552
 
553
-
554
  def _synthetic_multiclass(n_classes=3):
555
- """Generate synthetic multiclass classification dataset"""
556
- X, y = make_classification(
557
- n_samples=1000,
558
- n_features=10,
559
- n_informative=8,
560
- n_redundant=2,
561
- n_classes=n_classes,
562
- n_clusters_per_class=1,
563
- random_state=42
564
- )
565
  df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
566
  df["target"] = y
567
  return df
568
 
569
-
570
  def create_input_components(df, target_col):
571
  """Create input components for feature values"""
572
  feature_cols = [c for c in df.columns if c != target_col]
@@ -586,6 +80,9 @@ def create_input_components(df, target_col):
586
  )
587
  return components
588
 
 
 
 
589
 
590
  def preprocess_data(df, target_col, new_point_dict):
591
  """Preprocess data for softmax regression"""
@@ -597,19 +94,13 @@ def preprocess_data(df, target_col, new_point_dict):
597
  for col in feature_cols:
598
  X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
599
 
600
- # Get unique classes
601
- unique_vals = sorted(y.unique())
602
- n_classes = len(unique_vals)
603
 
604
- if n_classes < 2:
605
- raise ValueError(f"Need at least 2 classes. Found {n_classes}")
606
 
607
- # Map to 0, 1, 2, ... if needed
608
- y_mapped = y.copy()
609
- if list(unique_vals) != list(range(n_classes)):
610
- mapping = {val: i for i, val in enumerate(unique_vals)}
611
- y_mapped = y.map(mapping)
612
-
613
  # Prepare new point
614
  new_point = []
615
  for col in feature_cols:
@@ -622,112 +113,49 @@ def preprocess_data(df, target_col, new_point_dict):
622
  new_point.append(0.0)
623
 
624
  new_point = np.array(new_point, dtype=float).reshape(1, -1)
625
-
626
- return X.values, np.array(y_mapped, dtype=int), new_point, feature_cols, n_classes
627
-
628
 
629
  def add_bias(X):
630
  """Add bias column to feature matrix"""
631
  return np.c_[np.ones(X.shape[0]), X]
632
 
633
-
634
  def softmax(Z):
635
- """
636
- Softmax activation function: σ(z_i) = exp(z_i) / Σ exp(z_j)
637
-
638
- Args:
639
- Z: (N, K) matrix where N = samples, K = classes
640
-
641
- Returns:
642
- Probabilities (N, K) where each row sums to 1
643
- """
644
- # Numerical stability: subtract max
645
  Z_shifted = Z - np.max(Z, axis=1, keepdims=True)
646
  exp_Z = np.exp(Z_shifted)
647
  return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
648
 
649
-
650
  def predict_proba(X, Theta):
651
- """
652
- Make probability predictions using softmax
653
-
654
- Args:
655
- X: (N, d+1) feature matrix with bias
656
- Theta: (d+1, K) parameter matrix
657
-
658
- Returns:
659
- Probabilities (N, K)
660
- """
661
- Z = X.dot(Theta) # (N, K)
662
  return softmax(Z)
663
 
664
-
665
  def predict_class(X, Theta):
666
- """Make class predictions (argmax of probabilities)"""
667
  proba = predict_proba(X, Theta)
668
  return np.argmax(proba, axis=1)
669
 
670
-
671
- def one_hot_encode(y, n_classes):
672
- """
673
- Convert class labels to one-hot encoding
674
-
675
- Args:
676
- y: (N,) array of class labels [0, 1, 2, ...]
677
- n_classes: number of classes K
678
-
679
- Returns:
680
- (N, K) one-hot matrix
681
- """
682
- N = len(y)
683
- Y_onehot = np.zeros((N, n_classes))
684
- Y_onehot[np.arange(N), y] = 1
685
- return Y_onehot
686
-
687
-
688
- def compute_loss(Y_hat, Y_onehot):
689
- """
690
- Compute Categorical Cross-Entropy loss: -Σ y_k * log(ŷ_k)
691
-
692
- Args:
693
- Y_hat: (N, K) predicted probabilities
694
- Y_onehot: (N, K) one-hot encoded true labels
695
-
696
- Returns:
697
- Scalar loss
698
- """
699
  eps = 1e-15
700
  Y_hat = np.clip(Y_hat, eps, 1 - eps)
701
- loss = -np.sum(Y_onehot * np.log(Y_hat))
702
- return loss / len(Y_onehot)
703
-
704
-
705
- def compute_gradient(Y_hat, Y_onehot, X):
706
- """
707
- Compute gradient: X.T @ (Y_hat - Y_onehot) / N
708
-
709
- Args:
710
- Y_hat: (N, K) predicted probabilities
711
- Y_onehot: (N, K) one-hot encoded labels
712
- X: (N, d+1) feature matrix
713
-
714
- Returns:
715
- (d+1, K) gradient matrix
716
- """
717
- N = len(Y_onehot)
718
- return X.T.dot(Y_hat - Y_onehot) / N
719
 
 
 
 
 
720
 
721
  def update_theta(Theta, gradient, lr):
722
  """Update parameters using gradient descent"""
723
  return Theta - lr * gradient
724
 
725
-
726
  def compute_accuracy(y_true, y_pred):
727
  """Compute classification accuracy"""
728
  return np.mean(y_true == y_pred)
729
 
730
-
731
  def normalize_features(X_train, X_val=None, X_test=None):
732
  """Normalize features using standardization (zero mean, unit variance)"""
733
  mean = np.mean(X_train, axis=0)
@@ -740,11 +168,9 @@ def normalize_features(X_train, X_val=None, X_test=None):
740
 
741
  return X_train_norm, X_val_norm, X_test_norm, mean, std
742
 
743
-
744
- def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_classes, epochs, learning_rate, batch_size=None):
745
  """
746
  Train softmax regression with mini-batch gradient descent
747
-
748
  Returns:
749
  Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
750
  """
@@ -753,13 +179,13 @@ def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_c
753
  X_train_bias = add_bias(X_train_norm)
754
  X_val_bias = add_bias(X_val_norm)
755
 
756
- # Initialize Theta: (d+1, K) matrix
757
  np.random.seed(42)
758
- Theta = np.random.randn(X_train_bias.shape[1], n_classes) * 0.01
759
 
760
  # One-hot encode targets
761
- Y_train_onehot = one_hot_encode(y_train, n_classes)
762
- Y_val_onehot = one_hot_encode(y_val, n_classes)
763
 
764
  train_losses = []
765
  val_losses = []
@@ -768,74 +194,83 @@ def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, n_c
768
 
769
  n_samples = X_train_bias.shape[0]
770
 
771
- if batch_size is None or batch_size >= n_samples:
772
  actual_batch_size = n_samples
773
  else:
774
- actual_batch_size = batch_size
775
 
776
  for epoch in range(epochs):
777
- if actual_batch_size < n_samples:
778
- indices = np.random.permutation(n_samples)
779
- X_train_shuffled = X_train_bias[indices]
780
- Y_train_shuffled = Y_train_onehot[indices]
781
- y_train_shuffled = y_train[indices]
782
- else:
783
- X_train_shuffled = X_train_bias
784
- Y_train_shuffled = Y_train_onehot
785
- y_train_shuffled = y_train
786
 
787
- # Mini-batch gradient descent
788
  for i in range(0, n_samples, actual_batch_size):
789
  X_batch = X_train_shuffled[i:i+actual_batch_size]
790
- Y_batch = Y_train_shuffled[i:i+actual_batch_size]
791
 
792
  Y_batch_hat = predict_proba(X_batch, Theta)
793
  gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch)
794
  Theta = update_theta(Theta, gradient, learning_rate)
795
 
796
- # Compute training metrics
797
  Y_train_hat = predict_proba(X_train_bias, Theta)
798
- train_loss = compute_loss(Y_train_hat, Y_train_onehot)
799
  train_losses.append(train_loss)
800
 
801
  y_train_pred = predict_class(X_train_bias, Theta)
802
  train_acc = compute_accuracy(y_train, y_train_pred)
803
  train_accuracies.append(train_acc)
804
 
805
- # Compute validation metrics
806
  Y_val_hat = predict_proba(X_val_bias, Theta)
807
- val_loss = compute_loss(Y_val_hat, Y_val_onehot)
808
  val_losses.append(val_loss)
809
 
810
  y_val_pred = predict_class(X_val_bias, Theta)
811
  val_acc = compute_accuracy(y_val, y_val_pred)
812
  val_accuracies.append(val_acc)
813
 
814
- return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
 
817
  def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
818
  epochs, learning_rate, batch_size_str="Full Batch",
819
  train_test_split_ratio=0.8):
820
  """Run softmax regression training and generate visualizations"""
821
- X, y, new_point, feature_cols, n_classes = preprocess_data(df, target_col, new_point_dict)
822
 
823
  if epochs < 1:
824
- return None, None, None, "Number of epochs must be ≥ 1.", None, None
825
  if learning_rate <= 0:
826
- return None, None, None, "Learning rate must be > 0.", None, None
827
 
828
  test_size = 1.0 - train_test_split_ratio
 
 
829
  X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
830
 
831
- if batch_size_str == "Full Batch":
832
- batch_size = None
833
- else:
834
- batch_size = int(batch_size_str)
835
-
836
  start_time = time.time()
837
- Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std = train_softmax_regression_with_validation(
838
- X_train, y_train, X_val, y_val, n_classes, epochs, learning_rate, batch_size
839
  )
840
  training_time = time.time() - start_time
841
 
@@ -844,26 +279,15 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
844
  "feature_cols": feature_cols,
845
  "X_mean": X_mean,
846
  "X_std": X_std,
847
- "n_classes": n_classes
848
  })
849
 
850
- # Prepare normalized data for prediction
851
- X_train_norm, X_val_norm, _, _, _ = normalize_features(X_train, X_val)
852
- X_train_bias = add_bias(X_train_norm)
853
- X_val_bias = add_bias(X_val_norm)
854
-
855
- # Make prediction
856
  new_point_norm = (new_point - X_mean) / X_std
857
  new_point_bias = add_bias(new_point_norm)
858
- prediction_proba = predict_proba(new_point_bias, Theta)[0] # (K,)
859
  prediction_class = np.argmax(prediction_proba)
860
 
861
- # Compute final metrics
862
- y_train_pred = predict_class(X_train_bias, Theta)
863
- y_val_pred = predict_class(X_val_bias, Theta)
864
- train_acc_final = compute_accuracy(y_train, y_train_pred)
865
- val_acc_final = compute_accuracy(y_val, y_val_pred)
866
-
867
  final_train_loss = train_losses[-1]
868
  final_val_loss = val_losses[-1]
869
  final_train_acc = train_accuracies[-1]
@@ -871,9 +295,10 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
871
 
872
  train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
873
  val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
 
874
 
875
  results_display = create_results_display(
876
- Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, n_classes,
877
  split_info={
878
  "train_size": len(X_train),
879
  "val_size": len(X_val),
@@ -883,18 +308,12 @@ def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
883
  "val_loss": final_val_loss,
884
  "train_acc": final_train_acc,
885
  "val_acc": final_val_acc,
886
- "train_acc_final": train_acc_final,
887
- "val_acc_final": val_acc_final,
888
  "batch_size": batch_size_str,
889
  "training_time": training_time
890
  }
891
  )
892
 
893
- # Create confusion matrix visualization
894
- confusion_fig = create_confusion_matrix(y_val, y_val_pred, n_classes)
895
-
896
- return train_loss_fig, val_loss_fig, results_display, prediction_proba, prediction_class, confusion_fig
897
-
898
 
899
  def create_training_loss_chart(train_losses, train_accuracies):
900
  """Create training loss and accuracy visualization"""
@@ -953,7 +372,6 @@ def create_training_loss_chart(train_losses, train_accuracies):
953
 
954
  return fig
955
 
956
-
957
  def create_validation_loss_chart(val_losses, val_accuracies):
958
  """Create validation loss and accuracy visualization"""
959
  if not val_losses or len(val_losses) == 0:
@@ -1011,60 +429,15 @@ def create_validation_loss_chart(val_losses, val_accuracies):
1011
 
1012
  return fig
1013
 
1014
-
1015
- def create_confusion_matrix(y_true, y_pred, n_classes):
1016
- """Create confusion matrix heatmap"""
1017
- # Compute confusion matrix
1018
- cm = np.zeros((n_classes, n_classes), dtype=int)
1019
- for true, pred in zip(y_true, y_pred):
1020
- cm[true, pred] += 1
1021
-
1022
- # Create heatmap
1023
- fig = go.Figure(data=go.Heatmap(
1024
- z=cm,
1025
- x=[f"Pred {i}" for i in range(n_classes)],
1026
- y=[f"True {i}" for i in range(n_classes)],
1027
- colorscale='Blues',
1028
- text=cm,
1029
- texttemplate="%{text}",
1030
- textfont={"size": 16},
1031
- showscale=True,
1032
- hovertemplate='True: %{y}<br>Predicted: %{x}<br>Count: %{z}<extra></extra>'
1033
- ))
1034
-
1035
- fig.update_layout(
1036
- title="Confusion Matrix (Validation Set)",
1037
- xaxis_title="Predicted Class",
1038
- yaxis_title="True Class",
1039
- height=500,
1040
- width=500,
1041
- plot_bgcolor="white"
1042
- )
1043
-
1044
- return fig
1045
-
1046
-
1047
- def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, n_classes, split_info):
1048
  """Create HTML display showing model results"""
1049
 
1050
- # Format Theta matrix (only show first few rows if too large)
1051
- max_display_rows = 5
1052
- theta_rows = []
1053
- for i in range(min(Theta.shape[0], max_display_rows)):
1054
- row_str = ", ".join([f"{w:.4f}" for w in Theta[i]])
1055
- theta_rows.append(f"[{row_str}]")
1056
-
1057
- if Theta.shape[0] > max_display_rows:
1058
- theta_rows.append("...")
1059
-
1060
- theta_str = "<br>&nbsp;&nbsp;&nbsp;&nbsp;".join(theta_rows)
1061
-
1062
- # Format prediction probabilities
1063
- proba_str = "<br>".join([
1064
- f"&nbsp;&nbsp;• Class {i}: <span style='background:#E8F5E9;padding:2px 6px;border-radius:4px;'><strong>{prob:.4f}</strong></span> ({prob*100:.2f}%)"
1065
- for i, prob in enumerate(prediction_proba)
1066
- ])
1067
 
 
 
 
1068
  html_content = f"""
1069
  <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
1070
  <strong style='color:#0D47A1;'>📊 Softmax Regression Results</strong><br><br>
@@ -1072,9 +445,8 @@ def create_results_display(Theta, prediction_proba, prediction_class, feature_co
1072
  <div style='margin:8px 0;'>
1073
  <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
1074
  • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
1075
- • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)}<br>
1076
- Classes: {n_classes} | Normalization: Standardized<br>
1077
- • Activation: Softmax | Loss: Categorical Cross-Entropy<br>
1078
  </div>
1079
 
1080
  <div style='margin:8px 0;'>
@@ -1087,32 +459,25 @@ def create_results_display(Theta, prediction_proba, prediction_class, feature_co
1087
  <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
1088
  • Training Loss (CCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
1089
  • Validation Loss (CCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
1090
- • Training Accuracy: <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc_final']*100:.2f}%</strong></span><br>
1091
- • Validation Accuracy: <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc_final']*100:.2f}%</strong></span><br>
1092
  • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
1093
  </div>
1094
 
1095
  <div style='margin:8px 0;'>
1096
  <strong style='color:#1976D2;'>🎯 Learned Parameters (Θ):</strong><br>
1097
- • Theta shape = ({Theta.shape[0]}, {Theta.shape[1]}) - (features+1, classes)<br>
1098
- • First {min(Theta.shape[0], max_display_rows)} rows:<br>
1099
- <code style='background:#F3E5F5;padding:6px;border-radius:4px;display:block;margin-top:4px;font-size:0.85em;'>
1100
- &nbsp;&nbsp;{theta_str}
1101
- </code>
1102
  </div>
1103
 
1104
  <div style='margin:8px 0;'>
1105
- <strong style='color:#1976D2;'>🔮 Prediction for New Data Point:</strong><br>
 
 
1106
  <strong>Class Probabilities:</strong><br>
1107
- {proba_str}<br><br>
1108
- <strong>Predicted Class:</strong> <span style='background:#81C784;padding:4px 10px;border-radius:6px;font-size:1.1em;'><strong>Class {prediction_class}</strong></span><br>
1109
- <em style='font-size:0.9em;color:#424242;margin-top:4px;display:block;'>
1110
- * The model outputs probabilities for all {n_classes} classes using softmax activation<br>
1111
- * Prediction is the class with highest probability (argmax)
1112
- </em>
1113
  </div>
1114
  </div>
1115
  """
1116
 
1117
- return html_content
1118
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import numpy as np
3
  from sklearn.datasets import load_iris, load_wine, make_classification
4
  from sklearn.model_selection import train_test_split
5
+ from sklearn.metrics import confusion_matrix
6
  from plotly.subplots import make_subplots
7
  import plotly.graph_objects as go
8
+ import plotly.express as px
9
  import time
10
 
11
  _current_model_params = None
 
17
  global _current_model_params
18
  _current_model_params = params
19
 
 
20
  def load_data(file_obj=None, dataset_choice="Iris"):
21
+ """Load multi-class classification datasets"""
22
  if file_obj is not None:
23
  if file_obj.name.endswith(".csv"):
24
  encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
 
39
  "Synthetic (3 classes)": lambda: _synthetic_multiclass(n_classes=3),
40
  "Synthetic (5 classes)": lambda: _synthetic_multiclass(n_classes=5),
41
  }
42
+
43
  if dataset_choice not in datasets:
44
+ # Fallback if choice is invalid
45
+ return datasets["Iris"]()
46
  return datasets[dataset_choice]()
47
 
 
48
  def _sklearn_to_df(data):
49
  """Convert sklearn dataset to DataFrame"""
50
  df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
 
53
  df["target"] = data.target
54
  return df
55
 
 
56
  def _synthetic_multiclass(n_classes=3):
57
+ """Generate synthetic multi-class classification dataset"""
58
+ X, y = make_classification(n_samples=1000, n_features=10, n_informative=8,
59
+ n_redundant=2, n_classes=n_classes, random_state=42)
 
 
 
 
 
 
 
60
  df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(X.shape[1])])
61
  df["target"] = y
62
  return df
63
 
 
64
  def create_input_components(df, target_col):
65
  """Create input components for feature values"""
66
  feature_cols = [c for c in df.columns if c != target_col]
 
80
  )
81
  return components
82
 
83
+ def one_hot_encode(y, num_classes):
84
+ """Convert integer labels to one-hot encoded vectors"""
85
+ return np.eye(num_classes)[y]
86
 
87
  def preprocess_data(df, target_col, new_point_dict):
88
  """Preprocess data for softmax regression"""
 
94
  for col in feature_cols:
95
  X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
96
 
97
+ # Ensure target is numeric and get number of classes
98
+ y = pd.to_numeric(y, errors="coerce").fillna(0).astype(int)
99
+ num_classes = len(np.unique(y))
100
 
101
+ if num_classes < 2:
102
+ raise ValueError(f"Target must have at least 2 classes. Found {num_classes}.")
103
 
 
 
 
 
 
 
104
  # Prepare new point
105
  new_point = []
106
  for col in feature_cols:
 
113
  new_point.append(0.0)
114
 
115
  new_point = np.array(new_point, dtype=float).reshape(1, -1)
116
+
117
+ return X.values, y.values, num_classes, new_point, feature_cols
 
118
 
119
  def add_bias(X):
120
  """Add bias column to feature matrix"""
121
  return np.c_[np.ones(X.shape[0]), X]
122
 
 
123
  def softmax(Z):
124
+ """Softmax activation function: exp(z_k) / sum(exp(z_j))"""
125
+ # Shift Z for numerical stability to avoid overflow with exp()
 
 
 
 
 
 
 
 
126
  Z_shifted = Z - np.max(Z, axis=1, keepdims=True)
127
  exp_Z = np.exp(Z_shifted)
128
  return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
129
 
 
130
  def predict_proba(X, Theta):
131
+ """Make probability predictions: Y_hat = softmax(X @ Theta)"""
132
+ Z = X.dot(Theta)
 
 
 
 
 
 
 
 
 
133
  return softmax(Z)
134
 
 
135
  def predict_class(X, Theta):
136
+ """Make class predictions using argmax"""
137
  proba = predict_proba(X, Theta)
138
  return np.argmax(proba, axis=1)
139
 
140
+ def compute_loss(Y_hat, Y_one_hot):
141
+ """Compute Categorical Cross-Entropy loss: -sum(y_k * log(y_hat_k))"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  eps = 1e-15
143
  Y_hat = np.clip(Y_hat, eps, 1 - eps)
144
+ return -np.mean(np.sum(Y_one_hot * np.log(Y_hat), axis=1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
+ def compute_gradient(Y_hat, Y_one_hot, X):
147
+ """Compute gradient: X.T @ (Y_hat - Y_one_hot) / N"""
148
+ N = X.shape[0]
149
+ return X.T.dot(Y_hat - Y_one_hot) / N
150
 
151
  def update_theta(Theta, gradient, lr):
152
  """Update parameters using gradient descent"""
153
  return Theta - lr * gradient
154
 
 
155
  def compute_accuracy(y_true, y_pred):
156
  """Compute classification accuracy"""
157
  return np.mean(y_true == y_pred)
158
 
 
159
  def normalize_features(X_train, X_val=None, X_test=None):
160
  """Normalize features using standardization (zero mean, unit variance)"""
161
  mean = np.mean(X_train, axis=0)
 
168
 
169
  return X_train_norm, X_val_norm, X_test_norm, mean, std
170
 
171
+ def train_softmax_regression_with_validation(X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size=None):
 
172
  """
173
  Train softmax regression with mini-batch gradient descent
 
174
  Returns:
175
  Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std
176
  """
 
179
  X_train_bias = add_bias(X_train_norm)
180
  X_val_bias = add_bias(X_val_norm)
181
 
182
+ # Initialize Theta: (n_features + 1) x num_classes
183
  np.random.seed(42)
184
+ Theta = np.random.randn(X_train_bias.shape[1], num_classes) * 0.01
185
 
186
  # One-hot encode targets
187
+ Y_train_one_hot = one_hot_encode(y_train, num_classes)
188
+ Y_val_one_hot = one_hot_encode(y_val, num_classes)
189
 
190
  train_losses = []
191
  val_losses = []
 
194
 
195
  n_samples = X_train_bias.shape[0]
196
 
197
+ if batch_size is None or batch_size == "Full Batch" or int(batch_size) >= n_samples:
198
  actual_batch_size = n_samples
199
  else:
200
+ actual_batch_size = int(batch_size)
201
 
202
  for epoch in range(epochs):
203
+ # Shuffle training data
204
+ indices = np.random.permutation(n_samples)
205
+ X_train_shuffled = X_train_bias[indices]
206
+ Y_train_one_hot_shuffled = Y_train_one_hot[indices]
 
 
 
 
 
207
 
 
208
  for i in range(0, n_samples, actual_batch_size):
209
  X_batch = X_train_shuffled[i:i+actual_batch_size]
210
+ Y_batch = Y_train_one_hot_shuffled[i:i+actual_batch_size]
211
 
212
  Y_batch_hat = predict_proba(X_batch, Theta)
213
  gradient = compute_gradient(Y_batch_hat, Y_batch, X_batch)
214
  Theta = update_theta(Theta, gradient, learning_rate)
215
 
216
+ # Compute metrics
217
  Y_train_hat = predict_proba(X_train_bias, Theta)
218
+ train_loss = compute_loss(Y_train_hat, Y_train_one_hot)
219
  train_losses.append(train_loss)
220
 
221
  y_train_pred = predict_class(X_train_bias, Theta)
222
  train_acc = compute_accuracy(y_train, y_train_pred)
223
  train_accuracies.append(train_acc)
224
 
 
225
  Y_val_hat = predict_proba(X_val_bias, Theta)
226
+ val_loss = compute_loss(Y_val_hat, Y_val_one_hot)
227
  val_losses.append(val_loss)
228
 
229
  y_val_pred = predict_class(X_val_bias, Theta)
230
  val_acc = compute_accuracy(y_val, y_val_pred)
231
  val_accuracies.append(val_acc)
232
 
233
+ return Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val, y_val_pred
234
 
235
+ def create_confusion_matrix_chart(y_true, y_pred, num_classes):
236
+ """Create confusion matrix visualization using plotly"""
237
+ cm = confusion_matrix(y_true, y_pred)
238
+ labels = [f"Class {i}" for i in range(num_classes)]
239
+
240
+ fig = px.imshow(cm,
241
+ labels=dict(x="Predicted Label", y="True Label", color="Count"),
242
+ x=labels,
243
+ y=labels,
244
+ text_auto=True,
245
+ color_continuous_scale='Blues')
246
+
247
+ fig.update_layout(
248
+ title="Confusion Matrix (Validation Set)",
249
+ plot_bgcolor="white",
250
+ height=400,
251
+ margin=dict(l=40, r=40, t=80, b=40)
252
+ )
253
+ return fig
254
 
255
  def run_softmax_regression_and_visualize(df, target_col, new_point_dict,
256
  epochs, learning_rate, batch_size_str="Full Batch",
257
  train_test_split_ratio=0.8):
258
  """Run softmax regression training and generate visualizations"""
259
+ X, y, num_classes, new_point, feature_cols = preprocess_data(df, target_col, new_point_dict)
260
 
261
  if epochs < 1:
262
+ return None, None, None, "Number of epochs must be ≥ 1.", None
263
  if learning_rate <= 0:
264
+ return None, None, None, "Learning rate must be > 0.", None
265
 
266
  test_size = 1.0 - train_test_split_ratio
267
+ # Ensure stratify works even with small classes by checking counts if needed,
268
+ # but for simplicity we'll assume data is sufficient for demo.
269
  X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42, stratify=y)
270
 
 
 
 
 
 
271
  start_time = time.time()
272
+ Theta, train_losses, val_losses, train_accuracies, val_accuracies, X_mean, X_std, y_val_final, y_val_pred_final = train_softmax_regression_with_validation(
273
+ X_train, y_train, X_val, y_val, num_classes, epochs, learning_rate, batch_size_str
274
  )
275
  training_time = time.time() - start_time
276
 
 
279
  "feature_cols": feature_cols,
280
  "X_mean": X_mean,
281
  "X_std": X_std,
282
+ "num_classes": num_classes
283
  })
284
 
285
+ # Make prediction for new point
 
 
 
 
 
286
  new_point_norm = (new_point - X_mean) / X_std
287
  new_point_bias = add_bias(new_point_norm)
288
+ prediction_proba = predict_proba(new_point_bias, Theta)[0]
289
  prediction_class = np.argmax(prediction_proba)
290
 
 
 
 
 
 
 
291
  final_train_loss = train_losses[-1]
292
  final_val_loss = val_losses[-1]
293
  final_train_acc = train_accuracies[-1]
 
295
 
296
  train_loss_fig = create_training_loss_chart(train_losses, train_accuracies)
297
  val_loss_fig = create_validation_loss_chart(val_losses, val_accuracies)
298
+ # confusion_fig = create_confusion_matrix_chart(y_val_final, y_val_pred_final, num_classes)
299
 
300
  results_display = create_results_display(
301
+ Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes,
302
  split_info={
303
  "train_size": len(X_train),
304
  "val_size": len(X_val),
 
308
  "val_loss": final_val_loss,
309
  "train_acc": final_train_acc,
310
  "val_acc": final_val_acc,
 
 
311
  "batch_size": batch_size_str,
312
  "training_time": training_time
313
  }
314
  )
315
 
316
+ return train_loss_fig, val_loss_fig, results_display
 
 
 
 
317
 
318
  def create_training_loss_chart(train_losses, train_accuracies):
319
  """Create training loss and accuracy visualization"""
 
372
 
373
  return fig
374
 
 
375
  def create_validation_loss_chart(val_losses, val_accuracies):
376
  """Create validation loss and accuracy visualization"""
377
  if not val_losses or len(val_losses) == 0:
 
429
 
430
  return fig
431
 
432
+ def create_results_display(Theta, prediction_proba, prediction_class, feature_cols, epochs, learning_rate, num_classes, split_info):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
433
  """Create HTML display showing model results"""
434
 
435
+ # Format Theta for display (just showing shape or first few parameters if needed, usually too large for multi-class)
436
+ theta_shape_str = f"{Theta.shape[0]} x {Theta.shape[1]}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
+ # Format predicted probabilities for each class
439
+ proba_str = "<br>".join([f"• Class {i}: <strong>{p:.4f}</strong> ({p*100:.2f}%)" for i, p in enumerate(prediction_proba)])
440
+
441
  html_content = f"""
442
  <div style='background:#E3F2FD;border-left:6px solid #1976D2;padding:14px 16px;border-radius:10px;'>
443
  <strong style='color:#0D47A1;'>📊 Softmax Regression Results</strong><br><br>
 
445
  <div style='margin:8px 0;'>
446
  <strong style='color:#1976D2;'>🔧 Model Configuration:</strong><br>
447
  • Epochs: {epochs} | Learning Rate: {learning_rate}<br>
448
+ • Batch Size: {split_info.get('batch_size', 'Full Batch')} | Features: {len(feature_cols)} | Classes: {num_classes}<br>
449
+ Normalization: Standardized | Activation: Softmax | Loss: Categorical Cross-Entropy<br>
 
450
  </div>
451
 
452
  <div style='margin:8px 0;'>
 
459
  <strong style='color:#1976D2;'>📈 Performance Metrics:</strong><br>
460
  • Training Loss (CCE): <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_loss']:.4f}</strong></span><br>
461
  • Validation Loss (CCE): <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_loss']:.4f}</strong></span><br>
462
+ • Training Accuracy: <span style='background:#BBDEFB;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_acc']*100:.2f}%</strong></span><br>
463
+ • Validation Accuracy: <span style='background:#C5CAE9;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_acc']*100:.2f}%</strong></span><br>
464
  • Training Time: <span style='background:#E1BEE7;padding:2px 6px;border-radius:4px;'><strong>{split_info['training_time']:.4f}s</strong></span><br>
465
  </div>
466
 
467
  <div style='margin:8px 0;'>
468
  <strong style='color:#1976D2;'>🎯 Learned Parameters (Θ):</strong><br>
469
+ • Theta Shape = <code style='background:#F3E5F5;padding:2px 6px;border-radius:4px;'>{theta_shape_str}</code> (Features+Bias x Classes)<br>
 
 
 
 
470
  </div>
471
 
472
  <div style='margin:8px 0;'>
473
+ <strong style='color:#1976D2;'>🔮 Prediction for New Point:</strong><br>
474
+ • Predicted Class: <span style='background:#DCEDC8;padding:2px 6px;border-radius:4px;font-size:1.1em;'><strong>Class {prediction_class}</strong></span><br>
475
+ <div style='margin-top:8px;font-size:0.95em;'>
476
  <strong>Class Probabilities:</strong><br>
477
+ {proba_str}
478
+ </div>
 
 
 
 
479
  </div>
480
  </div>
481
  """
482
 
483
+ return html_content