atodorov284 commited on
Commit
20c0381
·
1 Parent(s): 4f659cb

Created a predictor class to encapsulate and wrap the loaded models. \n Make a manual prediction to ensure the MLFlow tracking was correct, which is indeed the case. \n Can be accessed through prediction.py.

Browse files
air-quality-forecast/prediction.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pandas as pd
4
+ from sklearn.base import BaseEstimator
5
+ from sklearn.metrics import root_mean_squared_error, mean_squared_error
6
+ import pickle
7
+ import xgboost
8
+
9
+ class PredictorModels:
10
+ def __init__(self) -> None:
11
+ '''
12
+ Initializes the predictor models by loading the pre-trained models from the saved_models directory.
13
+
14
+ The models are loaded in the following order:
15
+ 1. XGBoost
16
+ 2. Decision Tree
17
+ 3. Random Forest
18
+ '''
19
+ self._xgboost: xgboost.Booster = xgboost.Booster()
20
+ self._d_tree: BaseEstimator = None
21
+ self._random_forest: BaseEstimator = None
22
+ self._load_models()
23
+
24
+ def _load_models(self) -> None:
25
+ '''
26
+ Loads the pre-trained models from the saved_models directory.
27
+
28
+ The models are loaded in the following order:
29
+
30
+ 1. Decision Tree Regressor
31
+ 2. Random Forest Regressor
32
+ 3. XGBoost Regressor
33
+
34
+ The models are loaded from the following paths:
35
+
36
+ - Decision Tree Regressor: saved_models/decision_tree.pkl
37
+ - Random Forest Regressor: saved_models/random_forest.pkl
38
+ - XGBoost Regressor: saved_models/xgboost.xgb
39
+ '''
40
+
41
+ project_root = os.path.dirname(os.path.dirname(__file__))
42
+ models_path = os.path.join(project_root, "saved_models")
43
+
44
+ self._d_tree = pickle.load(open(os.path.join(models_path, "decision_tree.pkl"), "rb"))
45
+ self._random_forest = pickle.load(open(os.path.join(models_path, "random_forest.pkl"), "rb"))
46
+ self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
47
+
48
+ def xgb_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
49
+ """
50
+ Makes predictions using the loaded XGBoost regressor.
51
+
52
+ Parameters
53
+ ----------
54
+ x_test : pd.DataFrame
55
+ Data points to make predictions on.
56
+
57
+ Returns
58
+ -------
59
+ y_pred : np.ndarray
60
+ Predicted values for the input data points.
61
+ """
62
+ if x_test is None:
63
+ raise ValueError("x_test is None")
64
+ if x_test.ndim != 2:
65
+ raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
66
+ xgb_test = xgboost.DMatrix(x_test)
67
+ y_pred = self._xgboost.predict(xgb_test)
68
+ return y_pred
69
+
70
+ def random_forest_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
71
+ """
72
+ Makes predictions using the loaded Random Forest regressor.
73
+
74
+ Parameters
75
+ ----------
76
+ x_test : pd.DataFrame
77
+ Data points to make predictions on.
78
+
79
+ Returns
80
+ -------
81
+ y_pred : np.ndarray
82
+ Predicted values for the input data points.
83
+ """
84
+ if x_test is None:
85
+ raise ValueError("x_test is None")
86
+ if x_test.ndim != 2:
87
+ raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
88
+ y_pred = self._random_forest.predict(x_test)
89
+ return y_pred
90
+
91
+ def decision_tree_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
92
+ """
93
+ Makes predictions using the loaded decision tree regressor.
94
+
95
+ Parameters
96
+ ----------
97
+ x_test : pd.DataFrame
98
+ Input data to make predictions on.
99
+
100
+ Returns
101
+ -------
102
+ y_pred : np.ndarray
103
+ Predicted values.
104
+ """
105
+ if x_test is None:
106
+ raise ValueError("x_test is None")
107
+ if x_test.ndim != 2:
108
+ raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
109
+ y_pred = self._d_tree.predict(x_test)
110
+ return y_pred
111
+
112
+
113
+ if __name__ == "__main__":
114
+ predictor = PredictorModels()
115
+
116
+ x_train = pd.read_csv("data/processed/x_train.csv", index_col=0)
117
+ y_train = pd.read_csv("data/processed/y_train.csv", index_col=0)
118
+
119
+ y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
120
+ y_test_pred_rf = predictor.random_forest_predictions(x_train)
121
+ y_test_pred_xgb = predictor.xgb_predictions(x_train)
122
+
123
+ print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
124
+ print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
125
+ print("Train XGBoost MSE: ", mean_squared_error(y_train, y_test_pred_xgb))
126
+
127
+ print("Train Decision Tree RMSE: ", root_mean_squared_error(y_train, y_test_pred_dtree))
128
+ print("Train Random Forest RMSE: ", root_mean_squared_error(y_train, y_test_pred_rf))
129
+ print("Train XGBoost RMSE: ", root_mean_squared_error(y_train, y_test_pred_xgb))
130
+
131
+ x_test = pd.read_csv("data/processed/x_test.csv", index_col=0)
132
+ y_test = pd.read_csv("data/processed/y_test.csv", index_col=0)
133
+
134
+ y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
135
+ y_test_pred_rf = predictor.random_forest_predictions(x_test)
136
+ y_test_pred_xgb = predictor.xgb_predictions(x_test)
137
+
138
+ print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
139
+ print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))
140
+ print("Test XGBoost MSE: ", mean_squared_error(y_test, y_test_pred_xgb))
141
+
142
+ print("Test Decision Tree RMSE: ", root_mean_squared_error(y_test, y_test_pred_dtree))
143
+ print("Test Random Forest RMSE: ", root_mean_squared_error(y_test, y_test_pred_rf))
144
+ print("Test XGBoost RMSE: ", root_mean_squared_error(y_test, y_test_pred_xgb))
saved_models/decision_tree.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c525bd86eb05bbcbc4c47b376e6ad56a6709211508dc252937cca563f5224cc8
3
+ size 6132
saved_models/random_forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ded2e5b233ca79f47b9313038de84a350758eca872d9a1e4bbba14805b8cd0
3
+ size 2055582
saved_models/xgboost.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f5802d83d3041b537e54d20e594e0382901643af2443e4ae62b961233e95775
3
+ size 93202
saved_models/xgboost.xgb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e0bb78e1d807c9ce321d3e5cec9dbb377ad7740991c432ef711cdc9673c637d
3
+ size 6828485