Spaces:

saherPervaiz
/

Depression

Sleeping

App Files Files Community

saherPervaiz commited on Jan 14, 2025

Commit

72276ea

verified ·

1 Parent(s): 34ceabf

Update utils/model_training.py

Browse files

Files changed (1) hide show

utils/model_training.py +49 -61

utils/model_training.py CHANGED Viewed

@@ -1,85 +1,73 @@
 import pandas as pd
 from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression, LinearRegression
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
-# Function for classification model training
-def train_classification_models(X, y):
     """
-    Train various classification models and return their performance metrics.
     """
     models = {
-        'Logistic Regression': LogisticRegression(),
-        'Decision Tree': DecisionTreeClassifier(),
-        'Random Forest': RandomForestClassifier()
     }
-    results = []
-    # Split the data into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     for model_name, model in models.items():
-        # Train the model
         model.fit(X_train, y_train)
-        # Predict the test set results
         y_pred = model.predict(X_test)
-        # Calculate metrics
-        accuracy = accuracy_score(y_test, y_pred)
-        precision = (y_pred == 1).sum() / len(y_pred)  # Simplified precision for binary classification
-        recall = (y_pred == 1).sum() / (y_test == 1).sum()  # Simplified recall for binary classification
-        f1 = 2 * (precision * recall) / (precision + recall) if precision + recall != 0 else 0
-        results.append({
-            'Model': model_name,
-            'Accuracy': accuracy,
-            'Precision': precision,
-            'Recall': recall,
-            'F1 Score': f1
-        })
-    # Convert results to DataFrame for easy comparison
-    results_df = pd.DataFrame(results)
     return results_df
-# Function for regression model training
-def train_regression_models(X, y):
     """
-    Train various regression models and return their performance metrics.
     """
     models = {
-        'Linear Regression': LinearRegression(),
-        'Decision Tree Regressor': DecisionTreeRegressor(),
-        'Random Forest Regressor': RandomForestRegressor()
     }
-    results = []
-    # Split the data into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     for model_name, model in models.items():
-        # Train the model
         model.fit(X_train, y_train)
-        # Predict the test set results
         y_pred = model.predict(X_test)
-        # Calculate metrics
-        mse = mean_squared_error(y_test, y_pred)
-        r2 = r2_score(y_test, y_pred)
-        results.append({
-            'Model': model_name,
-            'MSE': mse,
-            'R2': r2
-        })
-    # Convert results to DataFrame for easy comparison
-    results_df = pd.DataFrame(results)
     return results_df

+# utils/model_training.py
 import pandas as pd
 from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.svm import SVC
+# Function to train a regression model
+def train_regression_model(X, y):
     """
+    Train a regression model on the given data and return metrics.
     """
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     models = {
+        'Linear Regression': LinearRegression(),
+        'Random Forest Regressor': RandomForestRegressor(),
     }
+    model_results = {}
     for model_name, model in models.items():
         model.fit(X_train, y_train)
         y_pred = model.predict(X_test)
+        mse = mean_squared_error(y_test, y_pred)
+        r2 = r2_score(y_test, y_pred)
+        model_results[model_name] = {'MSE': mse, 'R2': r2}
+    # Return results as a DataFrame
+    results_df = pd.DataFrame(model_results).T
     return results_df
+# Function to train a classification model
+def train_classification_model(X, y):
     """
+    Train a classification model on the given data and return metrics.
     """
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     models = {
+        'Logistic Regression': LogisticRegression(),
+        'Random Forest Classifier': RandomForestClassifier(),
+        'SVM': SVC(),
     }
+    model_results = {}
     for model_name, model in models.items():
         model.fit(X_train, y_train)
         y_pred = model.predict(X_test)
+        accuracy = accuracy_score(y_test, y_pred)
+        precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
+        recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
+        f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
+        model_results[model_name] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}
+    results_df = pd.DataFrame(model_results).T
     return results_df
+# Function to train both regression and classification models based on data type
+def train_all_models(X, y):
+    """
+    Train both regression and classification models on the given data and return metrics.
+    """
+    if y.dtype == 'object' or len(y.unique()) <= 10:  # Classification
+        return train_classification_model(X, y)
+    else:  # Regression
+        return train_regression_model(X, y)