Spaces:
Sleeping
Sleeping
Update utils/model_training.py
Browse files- utils/model_training.py +45 -50
utils/model_training.py
CHANGED
|
@@ -12,54 +12,49 @@ def train_all_models(X, y):
|
|
| 12 |
"""
|
| 13 |
Train all models and return a DataFrame with evaluation metrics.
|
| 14 |
"""
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
'
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
}
|
| 49 |
-
|
| 50 |
-
for name, regressor in regressors.items():
|
| 51 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 52 |
-
regressor.fit(X_train, y_train)
|
| 53 |
-
y_pred = regressor.predict(X_test)
|
| 54 |
-
|
| 55 |
-
metrics.append({
|
| 56 |
-
'Model': name,
|
| 57 |
-
'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
|
| 58 |
-
'Mean Absolute Error (MAE)': round(mean_absolute_error(y_test, y_pred), 2),
|
| 59 |
-
'R² Score': round(r2_score(y_test, y_pred), 2),
|
| 60 |
-
'Cross-Validated R² Score': round(cross_val_score(regressor, X, y, cv=5, scoring='r2').mean(), 2)
|
| 61 |
-
})
|
| 62 |
-
|
| 63 |
-
# Return metrics as a DataFrame for easy display
|
| 64 |
-
metrics_df = pd.DataFrame(metrics)
|
| 65 |
-
return metrics_df
|
|
|
|
| 12 |
"""
|
| 13 |
Train all models and return a DataFrame with evaluation metrics.
|
| 14 |
"""
|
| 15 |
+
model_results = []
|
| 16 |
+
models = [
|
| 17 |
+
("Logistic Regression", LogisticRegression(), 'classification'),
|
| 18 |
+
("Random Forest", RandomForestClassifier(), 'classification'),
|
| 19 |
+
("SVM", SVC(), 'classification'),
|
| 20 |
+
("KNN", KNeighborsClassifier(), 'classification'),
|
| 21 |
+
("Decision Tree", DecisionTreeClassifier(), 'classification'),
|
| 22 |
+
("Naive Bayes", GaussianNB(), 'classification'),
|
| 23 |
+
("Linear Regression", LinearRegression(), 'regression'),
|
| 24 |
+
("Random Forest Regressor", RandomForestRegressor(), 'regression'),
|
| 25 |
+
("SVR", SVR(), 'regression'),
|
| 26 |
+
("KNN Regressor", KNeighborsRegressor(), 'regression'),
|
| 27 |
+
("Decision Tree Regressor", DecisionTreeRegressor(), 'regression')
|
| 28 |
+
]
|
| 29 |
|
| 30 |
+
for model_name, model, task_type in models:
|
| 31 |
+
# Split data into train and test sets
|
| 32 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 33 |
+
|
| 34 |
+
# Train the model
|
| 35 |
+
model.fit(X_train, y_train)
|
| 36 |
+
|
| 37 |
+
# Make predictions
|
| 38 |
+
y_pred = model.predict(X_test)
|
| 39 |
+
|
| 40 |
+
# Evaluate based on task type
|
| 41 |
+
if task_type == 'classification':
|
| 42 |
+
metrics = {
|
| 43 |
+
"Model": model_name,
|
| 44 |
+
"Accuracy": accuracy_score(y_test, y_pred),
|
| 45 |
+
"Precision": precision_score(y_test, y_pred, average='weighted', zero_division=0),
|
| 46 |
+
"Recall": recall_score(y_test, y_pred, average='weighted', zero_division=0),
|
| 47 |
+
"F1 Score": f1_score(y_test, y_pred, average='weighted', zero_division=0)
|
| 48 |
+
}
|
| 49 |
+
else:
|
| 50 |
+
metrics = {
|
| 51 |
+
"Model": model_name,
|
| 52 |
+
"MSE": mean_squared_error(y_test, y_pred),
|
| 53 |
+
"MAE": mean_absolute_error(y_test, y_pred),
|
| 54 |
+
"R2 Score": r2_score(y_test, y_pred)
|
| 55 |
+
}
|
| 56 |
+
model_results.append(metrics)
|
| 57 |
+
|
| 58 |
+
# Convert to DataFrame for display
|
| 59 |
+
model_results_df = pd.DataFrame(model_results)
|
| 60 |
+
return model_results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|