Spaces:

03chrisk
/

air-quality-forecasting

Sleeping

App Files Files Community

atodorov284 commited on Oct 18, 2024

Commit

69a67ba

1 Parent(s): 58f52be

data wasn't normalized, predictions were wrong

Browse files

Files changed (4) hide show

air_quality_forecast/get_prediction_data.py +1 -1
air_quality_forecast/parser_ui.py +1 -1
air_quality_forecast/prediction.py +16 -3
streamlit_src/controllers/admin_controller.py +38 -1

air_quality_forecast/get_prediction_data.py CHANGED Viewed

@@ -14,7 +14,7 @@ def main():
     caller = APICaller()
     all_predictions = caller.lag_data()
-    df = pd.DataFrame(predictor.xgb_predictions(all_predictions))
     df.columns = [
         "NO2 + day 1",
         "O3 + day 1",

     caller = APICaller()
     all_predictions = caller.lag_data()
+    df = pd.DataFrame(predictor.xgb_predictions(all_predictions, normalized=False))
     df.columns = [
         "NO2 + day 1",
         "O3 + day 1",

air_quality_forecast/parser_ui.py CHANGED Viewed

@@ -184,7 +184,7 @@ def main():
         if model == "random_forest":
             y_pred = predictor.random_forest_predictions(predict_dataset)
         if model == "xgboost":
-            y_pred = predictor.xgb_predictions(predict_dataset)
         print(pd.DataFrame(y_pred).head())

         if model == "random_forest":
             y_pred = predictor.random_forest_predictions(predict_dataset)
         if model == "xgboost":
+            y_pred = predictor.xgb_predictions(predict_dataset, normalized=True)
         print(pd.DataFrame(y_pred).head())

air_quality_forecast/prediction.py CHANGED Viewed

@@ -5,6 +5,7 @@ from sklearn.base import BaseEstimator
 from sklearn.metrics import root_mean_squared_error, mean_squared_error
 import pickle
 import xgboost
 class PredictorModels:
@@ -50,7 +51,7 @@ class PredictorModels:
         )
         self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
-    def xgb_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
         """
         Makes predictions using the loaded XGBoost regressor.
@@ -59,6 +60,9 @@ class PredictorModels:
         x_test : pd.DataFrame
             Data points to make predictions on.
         Returns
         -------
         y_pred : np.ndarray
@@ -68,6 +72,15 @@ class PredictorModels:
             raise ValueError("x_test is None")
         if x_test.ndim != 2:
             raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
         xgb_test = xgboost.DMatrix(x_test)
         y_pred = self._xgboost.predict(xgb_test)
         return y_pred
@@ -123,7 +136,7 @@ if __name__ == "__main__":
     y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
     y_test_pred_rf = predictor.random_forest_predictions(x_train)
-    y_test_pred_xgb = predictor.xgb_predictions(x_train)
     print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
     print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
@@ -143,7 +156,7 @@ if __name__ == "__main__":
     y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
     y_test_pred_rf = predictor.random_forest_predictions(x_test)
-    y_test_pred_xgb = predictor.xgb_predictions(x_test)
     print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
     print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))

 from sklearn.metrics import root_mean_squared_error, mean_squared_error
 import pickle
 import xgboost
+import joblib
 class PredictorModels:
         )
         self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
+    def xgb_predictions(self, x_test: pd.DataFrame, normalized: bool) -> np.ndarray:
         """
         Makes predictions using the loaded XGBoost regressor.
         x_test : pd.DataFrame
             Data points to make predictions on.
+        normalized : bool
+            Whether the data is normalized or not.
         Returns
         -------
         y_pred : np.ndarray
             raise ValueError("x_test is None")
         if x_test.ndim != 2:
             raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
+        if not normalized:
+            project_root = os.path.dirname(os.path.dirname(__file__))
+            saved_models_path = os.path.join(project_root, "saved_models")
+            normalizer = joblib.load(
+                os.path.join(saved_models_path, "normalizer.joblib")
+            )
+            x_test = normalizer.transform(x_test)
         xgb_test = xgboost.DMatrix(x_test)
         y_pred = self._xgboost.predict(xgb_test)
         return y_pred
     y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
     y_test_pred_rf = predictor.random_forest_predictions(x_train)
+    y_test_pred_xgb = predictor.xgb_predictions(x_train, normalized=True)
     print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
     print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
     y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
     y_test_pred_rf = predictor.random_forest_predictions(x_test)
+    y_test_pred_xgb = predictor.xgb_predictions(x_test, normalized=True)
     print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
     print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))

streamlit_src/controllers/admin_controller.py CHANGED Viewed

@@ -59,11 +59,48 @@ class AdminController(UserController):
             dataset = self._view.upload_dataset()
             if dataset is not None:
                 data = pd.read_csv(dataset)
                 self._perform_data_validation(data)
                 if "date" in data.columns or "datetime" in data.columns:
                     data.set_index(
                         "date" if "date" in data.columns else "datetime", inplace=True
                     )
                 self._view.display_datatable(data, message="### User Data")
                 prediction = self._make_prediction(data)
@@ -90,7 +127,7 @@ class AdminController(UserController):
             pd.DataFrame: The prediction.
         """
         predictor = PredictorModels()
-        prediction = predictor.xgb_predictions(data)
         prediction = pd.DataFrame(
             prediction,
             columns=[

             dataset = self._view.upload_dataset()
             if dataset is not None:
                 data = pd.read_csv(dataset)
                 self._perform_data_validation(data)
                 if "date" in data.columns or "datetime" in data.columns:
                     data.set_index(
                         "date" if "date" in data.columns else "datetime", inplace=True
                     )
+                data.columns = [
+                    "pm25 - day 1",
+                    "pm10 - day 1",
+                    "o3 - day 1",
+                    "no2 - day 1",
+                    "temp - day 1",
+                    "humidity - day 1",
+                    "visibility - day 1",
+                    "solarradiation - day 1",
+                    "precip - day 1",
+                    "windspeed - day 1",
+                    "winddir - day 1",
+                    "pm25 - day 2",
+                    "pm10 - day 2",
+                    "o3 - day 2",
+                    "no2 - day 2",
+                    "temp - day 2",
+                    "humidity - day 2",
+                    "visibility - day 2",
+                    "solarradiation - day 2",
+                    "precip - day 2",
+                    "windspeed - day 2",
+                    "winddir - day 2",
+                    "pm25 - day 3",
+                    "pm10 - day 3",
+                    "o3 - day 3",
+                    "no2 - day 3",
+                    "temp - day 3",
+                    "humidity - day 3",
+                    "visibility - day 3",
+                    "solarradiation - day 3",
+                    "precip - day 3",
+                    "windspeed - day 3",
+                    "winddir - day 3",
+                ]
                 self._view.display_datatable(data, message="### User Data")
                 prediction = self._make_prediction(data)
             pd.DataFrame: The prediction.
         """
         predictor = PredictorModels()
+        prediction = predictor.xgb_predictions(data, normalized=False)
         prediction = pd.DataFrame(
             prediction,
             columns=[