atodorov284 commited on
Commit
69a67ba
·
1 Parent(s): 58f52be

data wasn't normalized, predictions were wrong

Browse files
air_quality_forecast/get_prediction_data.py CHANGED
@@ -14,7 +14,7 @@ def main():
14
  caller = APICaller()
15
  all_predictions = caller.lag_data()
16
 
17
- df = pd.DataFrame(predictor.xgb_predictions(all_predictions))
18
  df.columns = [
19
  "NO2 + day 1",
20
  "O3 + day 1",
 
14
  caller = APICaller()
15
  all_predictions = caller.lag_data()
16
 
17
+ df = pd.DataFrame(predictor.xgb_predictions(all_predictions, normalized=False))
18
  df.columns = [
19
  "NO2 + day 1",
20
  "O3 + day 1",
air_quality_forecast/parser_ui.py CHANGED
@@ -184,7 +184,7 @@ def main():
184
  if model == "random_forest":
185
  y_pred = predictor.random_forest_predictions(predict_dataset)
186
  if model == "xgboost":
187
- y_pred = predictor.xgb_predictions(predict_dataset)
188
 
189
  print(pd.DataFrame(y_pred).head())
190
 
 
184
  if model == "random_forest":
185
  y_pred = predictor.random_forest_predictions(predict_dataset)
186
  if model == "xgboost":
187
+ y_pred = predictor.xgb_predictions(predict_dataset, normalized=True)
188
 
189
  print(pd.DataFrame(y_pred).head())
190
 
air_quality_forecast/prediction.py CHANGED
@@ -5,6 +5,7 @@ from sklearn.base import BaseEstimator
5
  from sklearn.metrics import root_mean_squared_error, mean_squared_error
6
  import pickle
7
  import xgboost
 
8
 
9
 
10
  class PredictorModels:
@@ -50,7 +51,7 @@ class PredictorModels:
50
  )
51
  self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
52
 
53
- def xgb_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
54
  """
55
  Makes predictions using the loaded XGBoost regressor.
56
 
@@ -59,6 +60,9 @@ class PredictorModels:
59
  x_test : pd.DataFrame
60
  Data points to make predictions on.
61
 
 
 
 
62
  Returns
63
  -------
64
  y_pred : np.ndarray
@@ -68,6 +72,15 @@ class PredictorModels:
68
  raise ValueError("x_test is None")
69
  if x_test.ndim != 2:
70
  raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
 
 
 
 
 
 
 
 
 
71
  xgb_test = xgboost.DMatrix(x_test)
72
  y_pred = self._xgboost.predict(xgb_test)
73
  return y_pred
@@ -123,7 +136,7 @@ if __name__ == "__main__":
123
 
124
  y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
125
  y_test_pred_rf = predictor.random_forest_predictions(x_train)
126
- y_test_pred_xgb = predictor.xgb_predictions(x_train)
127
 
128
  print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
129
  print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
@@ -143,7 +156,7 @@ if __name__ == "__main__":
143
 
144
  y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
145
  y_test_pred_rf = predictor.random_forest_predictions(x_test)
146
- y_test_pred_xgb = predictor.xgb_predictions(x_test)
147
 
148
  print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
149
  print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))
 
5
  from sklearn.metrics import root_mean_squared_error, mean_squared_error
6
  import pickle
7
  import xgboost
8
+ import joblib
9
 
10
 
11
  class PredictorModels:
 
51
  )
52
  self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
53
 
54
+ def xgb_predictions(self, x_test: pd.DataFrame, normalized: bool) -> np.ndarray:
55
  """
56
  Makes predictions using the loaded XGBoost regressor.
57
 
 
60
  x_test : pd.DataFrame
61
  Data points to make predictions on.
62
 
63
+ normalized : bool
64
+ Whether the data is normalized or not.
65
+
66
  Returns
67
  -------
68
  y_pred : np.ndarray
 
72
  raise ValueError("x_test is None")
73
  if x_test.ndim != 2:
74
  raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
75
+
76
+ if not normalized:
77
+ project_root = os.path.dirname(os.path.dirname(__file__))
78
+ saved_models_path = os.path.join(project_root, "saved_models")
79
+ normalizer = joblib.load(
80
+ os.path.join(saved_models_path, "normalizer.joblib")
81
+ )
82
+ x_test = normalizer.transform(x_test)
83
+
84
  xgb_test = xgboost.DMatrix(x_test)
85
  y_pred = self._xgboost.predict(xgb_test)
86
  return y_pred
 
136
 
137
  y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
138
  y_test_pred_rf = predictor.random_forest_predictions(x_train)
139
+ y_test_pred_xgb = predictor.xgb_predictions(x_train, normalized=True)
140
 
141
  print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
142
  print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
 
156
 
157
  y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
158
  y_test_pred_rf = predictor.random_forest_predictions(x_test)
159
+ y_test_pred_xgb = predictor.xgb_predictions(x_test, normalized=True)
160
 
161
  print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
162
  print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))
streamlit_src/controllers/admin_controller.py CHANGED
@@ -59,11 +59,48 @@ class AdminController(UserController):
59
  dataset = self._view.upload_dataset()
60
  if dataset is not None:
61
  data = pd.read_csv(dataset)
 
62
  self._perform_data_validation(data)
63
  if "date" in data.columns or "datetime" in data.columns:
64
  data.set_index(
65
  "date" if "date" in data.columns else "datetime", inplace=True
66
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  self._view.display_datatable(data, message="### User Data")
68
 
69
  prediction = self._make_prediction(data)
@@ -90,7 +127,7 @@ class AdminController(UserController):
90
  pd.DataFrame: The prediction.
91
  """
92
  predictor = PredictorModels()
93
- prediction = predictor.xgb_predictions(data)
94
  prediction = pd.DataFrame(
95
  prediction,
96
  columns=[
 
59
  dataset = self._view.upload_dataset()
60
  if dataset is not None:
61
  data = pd.read_csv(dataset)
62
+
63
  self._perform_data_validation(data)
64
  if "date" in data.columns or "datetime" in data.columns:
65
  data.set_index(
66
  "date" if "date" in data.columns else "datetime", inplace=True
67
  )
68
+
69
+ data.columns = [
70
+ "pm25 - day 1",
71
+ "pm10 - day 1",
72
+ "o3 - day 1",
73
+ "no2 - day 1",
74
+ "temp - day 1",
75
+ "humidity - day 1",
76
+ "visibility - day 1",
77
+ "solarradiation - day 1",
78
+ "precip - day 1",
79
+ "windspeed - day 1",
80
+ "winddir - day 1",
81
+ "pm25 - day 2",
82
+ "pm10 - day 2",
83
+ "o3 - day 2",
84
+ "no2 - day 2",
85
+ "temp - day 2",
86
+ "humidity - day 2",
87
+ "visibility - day 2",
88
+ "solarradiation - day 2",
89
+ "precip - day 2",
90
+ "windspeed - day 2",
91
+ "winddir - day 2",
92
+ "pm25 - day 3",
93
+ "pm10 - day 3",
94
+ "o3 - day 3",
95
+ "no2 - day 3",
96
+ "temp - day 3",
97
+ "humidity - day 3",
98
+ "visibility - day 3",
99
+ "solarradiation - day 3",
100
+ "precip - day 3",
101
+ "windspeed - day 3",
102
+ "winddir - day 3",
103
+ ]
104
  self._view.display_datatable(data, message="### User Data")
105
 
106
  prediction = self._make_prediction(data)
 
127
  pd.DataFrame: The prediction.
128
  """
129
  predictor = PredictorModels()
130
+ prediction = predictor.xgb_predictions(data, normalized=False)
131
  prediction = pd.DataFrame(
132
  prediction,
133
  columns=[