Spaces:

ganeshkonapalli
/

logreg__

Sleeping

App Files Files Community

ganeshkonapalli commited on Jun 20, 2025

Commit

89e4a53

verified ·

1 Parent(s): fb5b58a

Create train_utils.py

Browse files

Files changed (1) hide show

train_utils.py +111 -0

train_utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import numpy as np
+import pandas as pd
+from sklearn.metrics import classification_report
+from tqdm import tqdm
+import joblib
+from config import LABEL_COLUMNS, MODEL_SAVE_DIR
+def train_logreg_models(X, y, label_encoders, model_class):
+    """
+    Trains one Logistic Regression model per label column.
+    Args:
+        X (array-like): Feature matrix (e.g., TF-IDF vectors).
+        y (DataFrame): Target DataFrame containing all label columns.
+        label_encoders (dict): Label encoders for each target.
+        model_class: LogisticRegression class.
+    Returns:
+        dict: Trained models keyed by label name.
+    """
+    models = {}
+    for col in LABEL_COLUMNS:
+        print(f"Training Logistic Regression model for {col}...")
+        model = model_class()
+        model.fit(X, y[col])
+        models[col] = model
+    return models
+def evaluate_logreg_models(models, X_val, y_val, label_encoders):
+    """
+    Evaluates Logistic Regression models on validation data.
+    Args:
+        models (dict): Dictionary of trained models per label.
+        X_val (array-like): Validation features.
+        y_val (DataFrame): Validation labels.
+        label_encoders (dict): Encoders used for decoding.
+    Returns:
+        tuple: (classification_reports, true_labels_list, predicted_labels_list)
+    """
+    reports = {}
+    truths = []
+    predictions = []
+    for col in LABEL_COLUMNS:
+        model = models[col]
+        y_true = y_val[col]
+        y_pred = model.predict(X_val)
+        truths.append(y_true.tolist())
+        predictions.append(y_pred.tolist())
+        report = classification_report(
+            y_true, y_pred, output_dict=True, zero_division=0
+        )
+        reports[col] = report
+    return reports, truths, predictions
+def summarize_metrics(metrics):
+    summary = []
+    for field, report in metrics.items():
+        precision = report['weighted avg'].get('precision', 0)
+        recall = report['weighted avg'].get('recall', 0)
+        f1 = report['weighted avg'].get('f1-score', 0)
+        support = report['weighted avg'].get('support', 0)
+        accuracy = report.get('accuracy', 0)
+        summary.append({
+            "Field": field,
+            "Precision": precision,
+            "Recall": recall,
+            "F1-Score": f1,
+            "Accuracy": accuracy,
+            "Support": support
+        })
+    return pd.DataFrame(summary)
+def save_logreg_models(models, model_name):
+    model_path = os.path.join(MODEL_SAVE_DIR, f"{model_name}.pkl")
+    joblib.dump(models, model_path)
+    print(f"Saved Logistic Regression models to {model_path}")
+def load_logreg_models(model_name):
+    model_path = os.path.join(MODEL_SAVE_DIR, f"{model_name}.pkl")
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model not found at {model_path}")
+    models = joblib.load(model_path)
+    print(f"Loaded Logistic Regression models from {model_path}")
+    return models
+def predict_logreg_probabilities(models, X):
+    """
+    Returns probability distributions for each label.
+    Returns:
+        list: One list per label of probability arrays.
+    """
+    all_probs = []
+    for col in LABEL_COLUMNS:
+        probs = models[col].predict_proba(X)
+        all_probs.append(probs)
+    return all_probs