saherPervaiz commited on
Commit
72276ea
·
verified ·
1 Parent(s): 34ceabf

Update utils/model_training.py

Browse files
Files changed (1) hide show
  1. utils/model_training.py +49 -61
utils/model_training.py CHANGED
@@ -1,85 +1,73 @@
 
1
  import pandas as pd
2
  from sklearn.model_selection import train_test_split
3
- from sklearn.linear_model import LogisticRegression, LinearRegression
4
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
5
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
6
- from sklearn.metrics import accuracy_score, mean_squared_error, r2_score
7
 
8
- # Function for classification model training
9
- def train_classification_models(X, y):
10
  """
11
- Train various classification models and return their performance metrics.
12
  """
 
 
13
  models = {
14
- 'Logistic Regression': LogisticRegression(),
15
- 'Decision Tree': DecisionTreeClassifier(),
16
- 'Random Forest': RandomForestClassifier()
17
  }
18
-
19
- results = []
20
-
21
- # Split the data into training and testing sets
22
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
23
-
24
  for model_name, model in models.items():
25
- # Train the model
26
  model.fit(X_train, y_train)
27
-
28
- # Predict the test set results
29
  y_pred = model.predict(X_test)
30
 
31
- # Calculate metrics
32
- accuracy = accuracy_score(y_test, y_pred)
33
- precision = (y_pred == 1).sum() / len(y_pred) # Simplified precision for binary classification
34
- recall = (y_pred == 1).sum() / (y_test == 1).sum() # Simplified recall for binary classification
35
- f1 = 2 * (precision * recall) / (precision + recall) if precision + recall != 0 else 0
36
 
37
- results.append({
38
- 'Model': model_name,
39
- 'Accuracy': accuracy,
40
- 'Precision': precision,
41
- 'Recall': recall,
42
- 'F1 Score': f1
43
- })
44
-
45
- # Convert results to DataFrame for easy comparison
46
- results_df = pd.DataFrame(results)
47
  return results_df
48
 
49
-
50
- # Function for regression model training
51
- def train_regression_models(X, y):
52
  """
53
- Train various regression models and return their performance metrics.
54
  """
 
 
55
  models = {
56
- 'Linear Regression': LinearRegression(),
57
- 'Decision Tree Regressor': DecisionTreeRegressor(),
58
- 'Random Forest Regressor': RandomForestRegressor()
59
  }
60
-
61
- results = []
62
-
63
- # Split the data into training and testing sets
64
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
65
-
66
  for model_name, model in models.items():
67
- # Train the model
68
  model.fit(X_train, y_train)
69
-
70
- # Predict the test set results
71
  y_pred = model.predict(X_test)
72
 
73
- # Calculate metrics
74
- mse = mean_squared_error(y_test, y_pred)
75
- r2 = r2_score(y_test, y_pred)
 
76
 
77
- results.append({
78
- 'Model': model_name,
79
- 'MSE': mse,
80
- 'R2': r2
81
- })
82
-
83
- # Convert results to DataFrame for easy comparison
84
- results_df = pd.DataFrame(results)
85
  return results_df
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/model_training.py
2
  import pandas as pd
3
  from sklearn.model_selection import train_test_split
4
+ from sklearn.linear_model import LinearRegression, LogisticRegression
5
+ from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
6
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
7
+ from sklearn.svm import SVC
8
 
9
+ # Function to train a regression model
10
+ def train_regression_model(X, y):
11
  """
12
+ Train a regression model on the given data and return metrics.
13
  """
14
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
15
+
16
  models = {
17
+ 'Linear Regression': LinearRegression(),
18
+ 'Random Forest Regressor': RandomForestRegressor(),
 
19
  }
20
+
21
+ model_results = {}
22
+
 
 
 
23
  for model_name, model in models.items():
 
24
  model.fit(X_train, y_train)
 
 
25
  y_pred = model.predict(X_test)
26
 
27
+ mse = mean_squared_error(y_test, y_pred)
28
+ r2 = r2_score(y_test, y_pred)
 
 
 
29
 
30
+ model_results[model_name] = {'MSE': mse, 'R2': r2}
31
+
32
+ # Return results as a DataFrame
33
+ results_df = pd.DataFrame(model_results).T
 
 
 
 
 
 
34
  return results_df
35
 
36
+ # Function to train a classification model
37
+ def train_classification_model(X, y):
 
38
  """
39
+ Train a classification model on the given data and return metrics.
40
  """
41
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
42
+
43
  models = {
44
+ 'Logistic Regression': LogisticRegression(),
45
+ 'Random Forest Classifier': RandomForestClassifier(),
46
+ 'SVM': SVC(),
47
  }
48
+
49
+ model_results = {}
50
+
 
 
 
51
  for model_name, model in models.items():
 
52
  model.fit(X_train, y_train)
 
 
53
  y_pred = model.predict(X_test)
54
 
55
+ accuracy = accuracy_score(y_test, y_pred)
56
+ precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
57
+ recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
58
+ f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)
59
 
60
+ model_results[model_name] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1 Score': f1}
61
+
62
+ results_df = pd.DataFrame(model_results).T
 
 
 
 
 
63
  return results_df
64
+
65
+ # Function to train both regression and classification models based on data type
66
+ def train_all_models(X, y):
67
+ """
68
+ Train both regression and classification models on the given data and return metrics.
69
+ """
70
+ if y.dtype == 'object' or len(y.unique()) <= 10: # Classification
71
+ return train_classification_model(X, y)
72
+ else: # Regression
73
+ return train_regression_model(X, y)