Depression / utils /model_training.py
saherPervaiz's picture
Update utils/model_training.py
4ccd84b verified
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.svm import SVC, SVR
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
def train_all_models(X, y):
"""
Train all models and return a DataFrame with evaluation metrics.
"""
model_results = []
models = [
("Logistic Regression", LogisticRegression(), 'classification'),
("Random Forest", RandomForestClassifier(), 'classification'),
("SVM", SVC(), 'classification'),
("KNN", KNeighborsClassifier(), 'classification'),
("Decision Tree", DecisionTreeClassifier(), 'classification'),
("Naive Bayes", GaussianNB(), 'classification'),
("Linear Regression", LinearRegression(), 'regression'),
("Random Forest Regressor", RandomForestRegressor(), 'regression'),
("SVR", SVR(), 'regression'),
("KNN Regressor", KNeighborsRegressor(), 'regression'),
("Decision Tree Regressor", DecisionTreeRegressor(), 'regression')
]
for model_name, model, task_type in models:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Train the model
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
# Evaluate based on task type
if task_type == 'classification':
metrics = {
"Model": model_name,
"Accuracy": accuracy_score(y_test, y_pred),
"Precision": precision_score(y_test, y_pred, average='weighted', zero_division=0),
"Recall": recall_score(y_test, y_pred, average='weighted', zero_division=0),
"F1 Score": f1_score(y_test, y_pred, average='weighted', zero_division=0)
}
else:
metrics = {
"Model": model_name,
"MSE": mean_squared_error(y_test, y_pred),
"MAE": mean_absolute_error(y_test, y_pred),
"R2 Score": r2_score(y_test, y_pred)
}
model_results.append(metrics)
# Convert to DataFrame for display
model_results_df = pd.DataFrame(model_results)
return model_results_df