saherPervaiz commited on
Commit
4ccd84b
·
verified ·
1 Parent(s): 98aea7e

Update utils/model_training.py

Browse files
Files changed (1) hide show
  1. utils/model_training.py +45 -50
utils/model_training.py CHANGED
@@ -12,54 +12,49 @@ def train_all_models(X, y):
12
  """
13
  Train all models and return a DataFrame with evaluation metrics.
14
  """
15
- metrics = []
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- if y.dtype == 'object' or len(y.unique()) <= 10: # Classification
18
- classifiers = {
19
- 'Logistic Regression': LogisticRegression(max_iter=5000),
20
- 'Decision Tree': DecisionTreeClassifier(),
21
- 'Random Forest': RandomForestClassifier(),
22
- 'Support Vector Machine (SVM)': SVC(),
23
- 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
24
- 'Naive Bayes': GaussianNB()
25
- }
26
-
27
- for name, classifier in classifiers.items():
28
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
29
- classifier.fit(X_train, y_train)
30
- y_pred = classifier.predict(X_test)
31
-
32
- metrics.append({
33
- 'Model': name,
34
- 'Accuracy': round(accuracy_score(y_test, y_pred), 2),
35
- 'Precision': round(precision_score(y_test, y_pred, zero_division=1, average='macro'), 2),
36
- 'Recall': round(recall_score(y_test, y_pred, zero_division=1, average='macro'), 2),
37
- 'F1-Score': round(f1_score(y_test, y_pred, zero_division=1, average='macro'), 2),
38
- 'Cross-Validated Accuracy': round(cross_val_score(classifier, X, y, cv=5, scoring='accuracy').mean(), 2)
39
- })
40
-
41
- else: # Regression
42
- regressors = {
43
- 'Linear Regression': LinearRegression(),
44
- 'Decision Tree Regressor': DecisionTreeRegressor(),
45
- 'Random Forest Regressor': RandomForestRegressor(),
46
- 'Support Vector Regressor (SVR)': SVR(),
47
- 'K-Nearest Neighbors Regressor (k-NN)': KNeighborsRegressor()
48
- }
49
-
50
- for name, regressor in regressors.items():
51
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
52
- regressor.fit(X_train, y_train)
53
- y_pred = regressor.predict(X_test)
54
-
55
- metrics.append({
56
- 'Model': name,
57
- 'Mean Squared Error (MSE)': round(mean_squared_error(y_test, y_pred), 2),
58
- 'Mean Absolute Error (MAE)': round(mean_absolute_error(y_test, y_pred), 2),
59
- 'R² Score': round(r2_score(y_test, y_pred), 2),
60
- 'Cross-Validated R² Score': round(cross_val_score(regressor, X, y, cv=5, scoring='r2').mean(), 2)
61
- })
62
-
63
- # Return metrics as a DataFrame for easy display
64
- metrics_df = pd.DataFrame(metrics)
65
- return metrics_df
 
12
  """
13
  Train all models and return a DataFrame with evaluation metrics.
14
  """
15
+ model_results = []
16
+ models = [
17
+ ("Logistic Regression", LogisticRegression(), 'classification'),
18
+ ("Random Forest", RandomForestClassifier(), 'classification'),
19
+ ("SVM", SVC(), 'classification'),
20
+ ("KNN", KNeighborsClassifier(), 'classification'),
21
+ ("Decision Tree", DecisionTreeClassifier(), 'classification'),
22
+ ("Naive Bayes", GaussianNB(), 'classification'),
23
+ ("Linear Regression", LinearRegression(), 'regression'),
24
+ ("Random Forest Regressor", RandomForestRegressor(), 'regression'),
25
+ ("SVR", SVR(), 'regression'),
26
+ ("KNN Regressor", KNeighborsRegressor(), 'regression'),
27
+ ("Decision Tree Regressor", DecisionTreeRegressor(), 'regression')
28
+ ]
29
 
30
+ for model_name, model, task_type in models:
31
+ # Split data into train and test sets
32
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
33
+
34
+ # Train the model
35
+ model.fit(X_train, y_train)
36
+
37
+ # Make predictions
38
+ y_pred = model.predict(X_test)
39
+
40
+ # Evaluate based on task type
41
+ if task_type == 'classification':
42
+ metrics = {
43
+ "Model": model_name,
44
+ "Accuracy": accuracy_score(y_test, y_pred),
45
+ "Precision": precision_score(y_test, y_pred, average='weighted', zero_division=0),
46
+ "Recall": recall_score(y_test, y_pred, average='weighted', zero_division=0),
47
+ "F1 Score": f1_score(y_test, y_pred, average='weighted', zero_division=0)
48
+ }
49
+ else:
50
+ metrics = {
51
+ "Model": model_name,
52
+ "MSE": mean_squared_error(y_test, y_pred),
53
+ "MAE": mean_absolute_error(y_test, y_pred),
54
+ "R2 Score": r2_score(y_test, y_pred)
55
+ }
56
+ model_results.append(metrics)
57
+
58
+ # Convert to DataFrame for display
59
+ model_results_df = pd.DataFrame(model_results)
60
+ return model_results_df