sathishleo commited on
Commit
59ebef0
·
1 Parent(s): f95a877

Add app.py, backend, and model for HF Space

Browse files
Files changed (1) hide show
  1. backend/train_model.py +61 -0
backend/train_model.py CHANGED
@@ -127,5 +127,66 @@ def train_model():
127
  print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
128
  print(f"[OK] All plots saved -> {PLOTS_DIR}")
129
  print(f"[OK] Reports saved -> {REPORTS_DIR}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  return best_estimator
 
127
  print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
128
  print(f"[OK] All plots saved -> {PLOTS_DIR}")
129
  print(f"[OK] Reports saved -> {REPORTS_DIR}")
130
+ from sklearn.preprocessing import StandardScaler
131
+ from sklearn.linear_model import LogisticRegression
132
+ from sklearn.metrics import log_loss, accuracy_score
133
+ import numpy as np
134
+ import os
135
+
136
+ # Scale data
137
+ scaler = StandardScaler()
138
+ X_scaled = scaler.fit_transform(X_clean)
139
+ X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(
140
+ X_scaled, Y_clean, test_size=0.2, random_state=42, stratify=Y_clean
141
+ )
142
+
143
+ def track_training(penalty, max_iter=50):
144
+ clf = LogisticRegression(
145
+ penalty=penalty,
146
+ solver="saga",
147
+ warm_start=True, # allows continuing training
148
+ max_iter=1, # train one step at a time
149
+ random_state=42
150
+ )
151
+
152
+ losses, accs = [], []
153
+ for i in range(max_iter):
154
+ clf.fit(X_train_g, y_train_g) # trains 1 iteration per loop
155
+ y_pred = clf.predict_proba(X_train_g)
156
+ losses.append(log_loss(y_train_g, y_pred))
157
+ accs.append(accuracy_score(y_train_g, np.argmax(y_pred, axis=1)))
158
+
159
+ return losses, accs
160
+
161
+ # Collect curves
162
+ loss_curves, acc_curves = {}, {}
163
+ loss_curves["L2"], acc_curves["L2"] = track_training("l2", max_iter=50)
164
+ loss_curves["L1"], acc_curves["L1"] = track_training("l1", max_iter=50)
165
+
166
+ # Plot curves
167
+ lineplot_curves(
168
+ loss_curves,
169
+ ylabel="Log Loss",
170
+ title="Logistic Regression – Loss vs Iterations",
171
+ save_path=os.path.join(PLOTS_DIR, "logreg_loss_curves.png")
172
+ )
173
+
174
+ lineplot_curves(
175
+ acc_curves,
176
+ ylabel="Training Accuracy",
177
+ title="Logistic Regression – Accuracy vs Iterations",
178
+ save_path=os.path.join(PLOTS_DIR, "logreg_accuracy_curves.png")
179
+ )
180
+
181
+ print(f"[OK] Reports saved under: {REPORTS_DIR}")
182
+ # Accuracy and F1 bar plots
183
+ # barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
184
+ # barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
185
+ # plt.savefig(os.path.join(PLOTS_DIR, "variance_comparison.png"), bbox_inches='tight')
186
+ # plt.close()
187
+ barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
188
+ barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
189
+
190
+ print(f"[OK] Plots saved -> {PLOTS_DIR}")
191
 
192
  return best_estimator