Spaces:
Sleeping
Sleeping
Commit ·
59ebef0
1
Parent(s): f95a877
Add app.py, backend, and model for HF Space
Browse files- backend/train_model.py +61 -0
backend/train_model.py
CHANGED
|
@@ -127,5 +127,66 @@ def train_model():
|
|
| 127 |
print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
|
| 128 |
print(f"[OK] All plots saved -> {PLOTS_DIR}")
|
| 129 |
print(f"[OK] Reports saved -> {REPORTS_DIR}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
return best_estimator
|
|
|
|
| 127 |
print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
|
| 128 |
print(f"[OK] All plots saved -> {PLOTS_DIR}")
|
| 129 |
print(f"[OK] Reports saved -> {REPORTS_DIR}")
|
| 130 |
+
from sklearn.preprocessing import StandardScaler
|
| 131 |
+
from sklearn.linear_model import LogisticRegression
|
| 132 |
+
from sklearn.metrics import log_loss, accuracy_score
|
| 133 |
+
import numpy as np
|
| 134 |
+
import os
|
| 135 |
+
|
| 136 |
+
# Scale data
|
| 137 |
+
scaler = StandardScaler()
|
| 138 |
+
X_scaled = scaler.fit_transform(X_clean)
|
| 139 |
+
X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(
|
| 140 |
+
X_scaled, Y_clean, test_size=0.2, random_state=42, stratify=Y_clean
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
def track_training(penalty, max_iter=50):
|
| 144 |
+
clf = LogisticRegression(
|
| 145 |
+
penalty=penalty,
|
| 146 |
+
solver="saga",
|
| 147 |
+
warm_start=True, # allows continuing training
|
| 148 |
+
max_iter=1, # train one step at a time
|
| 149 |
+
random_state=42
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
losses, accs = [], []
|
| 153 |
+
for i in range(max_iter):
|
| 154 |
+
clf.fit(X_train_g, y_train_g) # trains 1 iteration per loop
|
| 155 |
+
y_pred = clf.predict_proba(X_train_g)
|
| 156 |
+
losses.append(log_loss(y_train_g, y_pred))
|
| 157 |
+
accs.append(accuracy_score(y_train_g, np.argmax(y_pred, axis=1)))
|
| 158 |
+
|
| 159 |
+
return losses, accs
|
| 160 |
+
|
| 161 |
+
# Collect curves
|
| 162 |
+
loss_curves, acc_curves = {}, {}
|
| 163 |
+
loss_curves["L2"], acc_curves["L2"] = track_training("l2", max_iter=50)
|
| 164 |
+
loss_curves["L1"], acc_curves["L1"] = track_training("l1", max_iter=50)
|
| 165 |
+
|
| 166 |
+
# Plot curves
|
| 167 |
+
lineplot_curves(
|
| 168 |
+
loss_curves,
|
| 169 |
+
ylabel="Log Loss",
|
| 170 |
+
title="Logistic Regression – Loss vs Iterations",
|
| 171 |
+
save_path=os.path.join(PLOTS_DIR, "logreg_loss_curves.png")
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
lineplot_curves(
|
| 175 |
+
acc_curves,
|
| 176 |
+
ylabel="Training Accuracy",
|
| 177 |
+
title="Logistic Regression – Accuracy vs Iterations",
|
| 178 |
+
save_path=os.path.join(PLOTS_DIR, "logreg_accuracy_curves.png")
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
print(f"[OK] Reports saved under: {REPORTS_DIR}")
|
| 182 |
+
# Accuracy and F1 bar plots
|
| 183 |
+
# barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
|
| 184 |
+
# barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
|
| 185 |
+
# plt.savefig(os.path.join(PLOTS_DIR, "variance_comparison.png"), bbox_inches='tight')
|
| 186 |
+
# plt.close()
|
| 187 |
+
barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
|
| 188 |
+
barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
|
| 189 |
+
|
| 190 |
+
print(f"[OK] Plots saved -> {PLOTS_DIR}")
|
| 191 |
|
| 192 |
return best_estimator
|