Spaces:

sathishleo
/

mlmodels

Sleeping

App Files Files Community

sathishleo commited on Aug 24, 2025

Commit

59ebef0

1 Parent(s): f95a877

Add app.py, backend, and model for HF Space

Browse files

Files changed (1) hide show

backend/train_model.py +61 -0

backend/train_model.py CHANGED Viewed

@@ -127,5 +127,66 @@ def train_model():
     print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
     print(f"[OK] All plots saved -> {PLOTS_DIR}")
     print(f"[OK] Reports saved -> {REPORTS_DIR}")
     return best_estimator

     print(f"[OK] Best model ({best_name}) saved with F1={best_f1:.4f}")
     print(f"[OK] All plots saved -> {PLOTS_DIR}")
     print(f"[OK] Reports saved -> {REPORTS_DIR}")
+    from sklearn.preprocessing import StandardScaler
+    from sklearn.linear_model import LogisticRegression
+    from sklearn.metrics import log_loss, accuracy_score
+    import numpy as np
+    import os
+    # Scale data
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X_clean)
+    X_train_g, X_test_g, y_train_g, y_test_g = train_test_split(
+        X_scaled, Y_clean, test_size=0.2, random_state=42, stratify=Y_clean
+    )
+    def track_training(penalty, max_iter=50):
+        clf = LogisticRegression(
+            penalty=penalty,
+            solver="saga",
+            warm_start=True,  # allows continuing training
+            max_iter=1,  # train one step at a time
+            random_state=42
+        )
+        losses, accs = [], []
+        for i in range(max_iter):
+            clf.fit(X_train_g, y_train_g)  # trains 1 iteration per loop
+            y_pred = clf.predict_proba(X_train_g)
+            losses.append(log_loss(y_train_g, y_pred))
+            accs.append(accuracy_score(y_train_g, np.argmax(y_pred, axis=1)))
+        return losses, accs
+    # Collect curves
+    loss_curves, acc_curves = {}, {}
+    loss_curves["L2"], acc_curves["L2"] = track_training("l2", max_iter=50)
+    loss_curves["L1"], acc_curves["L1"] = track_training("l1", max_iter=50)
+    # Plot curves
+    lineplot_curves(
+        loss_curves,
+        ylabel="Log Loss",
+        title="Logistic Regression – Loss vs Iterations",
+        save_path=os.path.join(PLOTS_DIR, "logreg_loss_curves.png")
+    )
+    lineplot_curves(
+        acc_curves,
+        ylabel="Training Accuracy",
+        title="Logistic Regression – Accuracy vs Iterations",
+        save_path=os.path.join(PLOTS_DIR, "logreg_accuracy_curves.png")
+    )
+    print(f"[OK] Reports saved under: {REPORTS_DIR}")
+    # Accuracy and F1 bar plots
+    # barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
+    # barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
+    # plt.savefig(os.path.join(PLOTS_DIR, "variance_comparison.png"), bbox_inches='tight')
+    # plt.close()
+    barplot_metric(results_df, "Accuracy", os.path.join(PLOTS_DIR, "model_accuracy.png"), "Model Accuracy (tuned)")
+    barplot_metric(results_df, "F1", os.path.join(PLOTS_DIR, "model_f1.png"), "Model F1 (tuned)")
+    print(f"[OK] Plots saved -> {PLOTS_DIR}")
     return best_estimator