Spaces:
Sleeping
Sleeping
Update utils/model_training.py
Browse files- utils/model_training.py +18 -6
utils/model_training.py
CHANGED
|
@@ -1,10 +1,19 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
metrics = []
|
|
|
|
| 8 |
if y.dtype == 'object' or len(y.unique()) <= 10: # Classification
|
| 9 |
classifiers = {
|
| 10 |
'Logistic Regression': LogisticRegression(max_iter=5000),
|
|
@@ -14,11 +23,12 @@ def train_models(df, target, features):
|
|
| 14 |
'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
|
| 15 |
'Naive Bayes': GaussianNB()
|
| 16 |
}
|
| 17 |
-
|
| 18 |
for name, classifier in classifiers.items():
|
| 19 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
|
| 20 |
classifier.fit(X_train, y_train)
|
| 21 |
y_pred = classifier.predict(X_test)
|
|
|
|
| 22 |
metrics.append({
|
| 23 |
'Model': name,
|
| 24 |
'Accuracy': round(accuracy_score(y_test, y_pred), 2),
|
|
@@ -41,6 +51,7 @@ def train_models(df, target, features):
|
|
| 41 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 42 |
regressor.fit(X_train, y_train)
|
| 43 |
y_pred = regressor.predict(X_test)
|
|
|
|
| 44 |
metrics.append({
|
| 45 |
'Model': name,
|
| 46 |
'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
|
|
@@ -49,5 +60,6 @@ def train_models(df, target, features):
|
|
| 49 |
'Cross-Validated R² Score': round(cross_val_score(regressor, X, y, cv=5, scoring='r2').mean(), 2)
|
| 50 |
})
|
| 51 |
|
|
|
|
| 52 |
metrics_df = pd.DataFrame(metrics)
|
| 53 |
return metrics_df
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sklearn.model_selection import train_test_split, cross_val_score
|
| 3 |
+
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
| 4 |
+
from sklearn.linear_model import LogisticRegression, LinearRegression
|
| 5 |
+
from sklearn.svm import SVC, SVR
|
| 6 |
+
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
|
| 7 |
+
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
| 8 |
+
from sklearn.naive_bayes import GaussianNB
|
| 9 |
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
|
| 10 |
|
| 11 |
+
def train_all_models(X, y):
|
| 12 |
+
"""
|
| 13 |
+
Train all models and return a DataFrame with evaluation metrics.
|
| 14 |
+
"""
|
| 15 |
metrics = []
|
| 16 |
+
|
| 17 |
if y.dtype == 'object' or len(y.unique()) <= 10: # Classification
|
| 18 |
classifiers = {
|
| 19 |
'Logistic Regression': LogisticRegression(max_iter=5000),
|
|
|
|
| 23 |
'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
|
| 24 |
'Naive Bayes': GaussianNB()
|
| 25 |
}
|
| 26 |
+
|
| 27 |
for name, classifier in classifiers.items():
|
| 28 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
|
| 29 |
classifier.fit(X_train, y_train)
|
| 30 |
y_pred = classifier.predict(X_test)
|
| 31 |
+
|
| 32 |
metrics.append({
|
| 33 |
'Model': name,
|
| 34 |
'Accuracy': round(accuracy_score(y_test, y_pred), 2),
|
|
|
|
| 51 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 52 |
regressor.fit(X_train, y_train)
|
| 53 |
y_pred = regressor.predict(X_test)
|
| 54 |
+
|
| 55 |
metrics.append({
|
| 56 |
'Model': name,
|
| 57 |
'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
|
|
|
|
| 60 |
'Cross-Validated R² Score': round(cross_val_score(regressor, X, y, cv=5, scoring='r2').mean(), 2)
|
| 61 |
})
|
| 62 |
|
| 63 |
+
# Return metrics as a DataFrame for easy display
|
| 64 |
metrics_df = pd.DataFrame(metrics)
|
| 65 |
return metrics_df
|