Spaces:
Sleeping
Sleeping
atodorov284
commited on
Commit
·
69a67ba
1
Parent(s):
58f52be
data wasn't normalized, predictions were wrong
Browse files
air_quality_forecast/get_prediction_data.py
CHANGED
|
@@ -14,7 +14,7 @@ def main():
|
|
| 14 |
caller = APICaller()
|
| 15 |
all_predictions = caller.lag_data()
|
| 16 |
|
| 17 |
-
df = pd.DataFrame(predictor.xgb_predictions(all_predictions))
|
| 18 |
df.columns = [
|
| 19 |
"NO2 + day 1",
|
| 20 |
"O3 + day 1",
|
|
|
|
| 14 |
caller = APICaller()
|
| 15 |
all_predictions = caller.lag_data()
|
| 16 |
|
| 17 |
+
df = pd.DataFrame(predictor.xgb_predictions(all_predictions, normalized=False))
|
| 18 |
df.columns = [
|
| 19 |
"NO2 + day 1",
|
| 20 |
"O3 + day 1",
|
air_quality_forecast/parser_ui.py
CHANGED
|
@@ -184,7 +184,7 @@ def main():
|
|
| 184 |
if model == "random_forest":
|
| 185 |
y_pred = predictor.random_forest_predictions(predict_dataset)
|
| 186 |
if model == "xgboost":
|
| 187 |
-
y_pred = predictor.xgb_predictions(predict_dataset)
|
| 188 |
|
| 189 |
print(pd.DataFrame(y_pred).head())
|
| 190 |
|
|
|
|
| 184 |
if model == "random_forest":
|
| 185 |
y_pred = predictor.random_forest_predictions(predict_dataset)
|
| 186 |
if model == "xgboost":
|
| 187 |
+
y_pred = predictor.xgb_predictions(predict_dataset, normalized=True)
|
| 188 |
|
| 189 |
print(pd.DataFrame(y_pred).head())
|
| 190 |
|
air_quality_forecast/prediction.py
CHANGED
|
@@ -5,6 +5,7 @@ from sklearn.base import BaseEstimator
|
|
| 5 |
from sklearn.metrics import root_mean_squared_error, mean_squared_error
|
| 6 |
import pickle
|
| 7 |
import xgboost
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
class PredictorModels:
|
|
@@ -50,7 +51,7 @@ class PredictorModels:
|
|
| 50 |
)
|
| 51 |
self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
|
| 52 |
|
| 53 |
-
def xgb_predictions(self, x_test: pd.DataFrame) -> np.ndarray:
|
| 54 |
"""
|
| 55 |
Makes predictions using the loaded XGBoost regressor.
|
| 56 |
|
|
@@ -59,6 +60,9 @@ class PredictorModels:
|
|
| 59 |
x_test : pd.DataFrame
|
| 60 |
Data points to make predictions on.
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
Returns
|
| 63 |
-------
|
| 64 |
y_pred : np.ndarray
|
|
@@ -68,6 +72,15 @@ class PredictorModels:
|
|
| 68 |
raise ValueError("x_test is None")
|
| 69 |
if x_test.ndim != 2:
|
| 70 |
raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
xgb_test = xgboost.DMatrix(x_test)
|
| 72 |
y_pred = self._xgboost.predict(xgb_test)
|
| 73 |
return y_pred
|
|
@@ -123,7 +136,7 @@ if __name__ == "__main__":
|
|
| 123 |
|
| 124 |
y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
|
| 125 |
y_test_pred_rf = predictor.random_forest_predictions(x_train)
|
| 126 |
-
y_test_pred_xgb = predictor.xgb_predictions(x_train)
|
| 127 |
|
| 128 |
print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
|
| 129 |
print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
|
|
@@ -143,7 +156,7 @@ if __name__ == "__main__":
|
|
| 143 |
|
| 144 |
y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
|
| 145 |
y_test_pred_rf = predictor.random_forest_predictions(x_test)
|
| 146 |
-
y_test_pred_xgb = predictor.xgb_predictions(x_test)
|
| 147 |
|
| 148 |
print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
|
| 149 |
print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))
|
|
|
|
| 5 |
from sklearn.metrics import root_mean_squared_error, mean_squared_error
|
| 6 |
import pickle
|
| 7 |
import xgboost
|
| 8 |
+
import joblib
|
| 9 |
|
| 10 |
|
| 11 |
class PredictorModels:
|
|
|
|
| 51 |
)
|
| 52 |
self._xgboost.load_model(os.path.join(models_path, "xgboost.xgb"))
|
| 53 |
|
| 54 |
+
def xgb_predictions(self, x_test: pd.DataFrame, normalized: bool) -> np.ndarray:
|
| 55 |
"""
|
| 56 |
Makes predictions using the loaded XGBoost regressor.
|
| 57 |
|
|
|
|
| 60 |
x_test : pd.DataFrame
|
| 61 |
Data points to make predictions on.
|
| 62 |
|
| 63 |
+
normalized : bool
|
| 64 |
+
Whether the data is normalized or not.
|
| 65 |
+
|
| 66 |
Returns
|
| 67 |
-------
|
| 68 |
y_pred : np.ndarray
|
|
|
|
| 72 |
raise ValueError("x_test is None")
|
| 73 |
if x_test.ndim != 2:
|
| 74 |
raise ValueError("x_test must be 2 dimensional, got {}".format(x_test.ndim))
|
| 75 |
+
|
| 76 |
+
if not normalized:
|
| 77 |
+
project_root = os.path.dirname(os.path.dirname(__file__))
|
| 78 |
+
saved_models_path = os.path.join(project_root, "saved_models")
|
| 79 |
+
normalizer = joblib.load(
|
| 80 |
+
os.path.join(saved_models_path, "normalizer.joblib")
|
| 81 |
+
)
|
| 82 |
+
x_test = normalizer.transform(x_test)
|
| 83 |
+
|
| 84 |
xgb_test = xgboost.DMatrix(x_test)
|
| 85 |
y_pred = self._xgboost.predict(xgb_test)
|
| 86 |
return y_pred
|
|
|
|
| 136 |
|
| 137 |
y_test_pred_dtree = predictor.decision_tree_predictions(x_train)
|
| 138 |
y_test_pred_rf = predictor.random_forest_predictions(x_train)
|
| 139 |
+
y_test_pred_xgb = predictor.xgb_predictions(x_train, normalized=True)
|
| 140 |
|
| 141 |
print("Train Decision Tree MSE: ", mean_squared_error(y_train, y_test_pred_dtree))
|
| 142 |
print("Train Random Forest MSE: ", mean_squared_error(y_train, y_test_pred_rf))
|
|
|
|
| 156 |
|
| 157 |
y_test_pred_dtree = predictor.decision_tree_predictions(x_test)
|
| 158 |
y_test_pred_rf = predictor.random_forest_predictions(x_test)
|
| 159 |
+
y_test_pred_xgb = predictor.xgb_predictions(x_test, normalized=True)
|
| 160 |
|
| 161 |
print("Test Decision Tree MSE: ", mean_squared_error(y_test, y_test_pred_dtree))
|
| 162 |
print("Test Random Forest MSE: ", mean_squared_error(y_test, y_test_pred_rf))
|
streamlit_src/controllers/admin_controller.py
CHANGED
|
@@ -59,11 +59,48 @@ class AdminController(UserController):
|
|
| 59 |
dataset = self._view.upload_dataset()
|
| 60 |
if dataset is not None:
|
| 61 |
data = pd.read_csv(dataset)
|
|
|
|
| 62 |
self._perform_data_validation(data)
|
| 63 |
if "date" in data.columns or "datetime" in data.columns:
|
| 64 |
data.set_index(
|
| 65 |
"date" if "date" in data.columns else "datetime", inplace=True
|
| 66 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
self._view.display_datatable(data, message="### User Data")
|
| 68 |
|
| 69 |
prediction = self._make_prediction(data)
|
|
@@ -90,7 +127,7 @@ class AdminController(UserController):
|
|
| 90 |
pd.DataFrame: The prediction.
|
| 91 |
"""
|
| 92 |
predictor = PredictorModels()
|
| 93 |
-
prediction = predictor.xgb_predictions(data)
|
| 94 |
prediction = pd.DataFrame(
|
| 95 |
prediction,
|
| 96 |
columns=[
|
|
|
|
| 59 |
dataset = self._view.upload_dataset()
|
| 60 |
if dataset is not None:
|
| 61 |
data = pd.read_csv(dataset)
|
| 62 |
+
|
| 63 |
self._perform_data_validation(data)
|
| 64 |
if "date" in data.columns or "datetime" in data.columns:
|
| 65 |
data.set_index(
|
| 66 |
"date" if "date" in data.columns else "datetime", inplace=True
|
| 67 |
)
|
| 68 |
+
|
| 69 |
+
data.columns = [
|
| 70 |
+
"pm25 - day 1",
|
| 71 |
+
"pm10 - day 1",
|
| 72 |
+
"o3 - day 1",
|
| 73 |
+
"no2 - day 1",
|
| 74 |
+
"temp - day 1",
|
| 75 |
+
"humidity - day 1",
|
| 76 |
+
"visibility - day 1",
|
| 77 |
+
"solarradiation - day 1",
|
| 78 |
+
"precip - day 1",
|
| 79 |
+
"windspeed - day 1",
|
| 80 |
+
"winddir - day 1",
|
| 81 |
+
"pm25 - day 2",
|
| 82 |
+
"pm10 - day 2",
|
| 83 |
+
"o3 - day 2",
|
| 84 |
+
"no2 - day 2",
|
| 85 |
+
"temp - day 2",
|
| 86 |
+
"humidity - day 2",
|
| 87 |
+
"visibility - day 2",
|
| 88 |
+
"solarradiation - day 2",
|
| 89 |
+
"precip - day 2",
|
| 90 |
+
"windspeed - day 2",
|
| 91 |
+
"winddir - day 2",
|
| 92 |
+
"pm25 - day 3",
|
| 93 |
+
"pm10 - day 3",
|
| 94 |
+
"o3 - day 3",
|
| 95 |
+
"no2 - day 3",
|
| 96 |
+
"temp - day 3",
|
| 97 |
+
"humidity - day 3",
|
| 98 |
+
"visibility - day 3",
|
| 99 |
+
"solarradiation - day 3",
|
| 100 |
+
"precip - day 3",
|
| 101 |
+
"windspeed - day 3",
|
| 102 |
+
"winddir - day 3",
|
| 103 |
+
]
|
| 104 |
self._view.display_datatable(data, message="### User Data")
|
| 105 |
|
| 106 |
prediction = self._make_prediction(data)
|
|
|
|
| 127 |
pd.DataFrame: The prediction.
|
| 128 |
"""
|
| 129 |
predictor = PredictorModels()
|
| 130 |
+
prediction = predictor.xgb_predictions(data, normalized=False)
|
| 131 |
prediction = pd.DataFrame(
|
| 132 |
prediction,
|
| 133 |
columns=[
|