Spaces:
Build error
Build error
atodorov284 commited on
Commit ·
c285c60
1
Parent(s): a5cacc1
Workflow fix for flake8 and reproducibility.
Browse files
.github/workflows/running.yml
CHANGED
|
@@ -26,4 +26,4 @@ jobs:
|
|
| 26 |
- name: Run main.py
|
| 27 |
id: run_main
|
| 28 |
run: |
|
| 29 |
-
python3 main.py
|
|
|
|
| 26 |
- name: Run main.py
|
| 27 |
id: run_main
|
| 28 |
run: |
|
| 29 |
+
python3 air-quality-forecast/main.py
|
air-quality-forecast/data_pipeline.py
CHANGED
|
@@ -224,10 +224,7 @@ class PreprocessingPipeline:
|
|
| 224 |
self.normalizer = MinMaxScaler()
|
| 225 |
|
| 226 |
def train_test_split(
|
| 227 |
-
self,
|
| 228 |
-
x: pd.DataFrame,
|
| 229 |
-
y: pd.DataFrame,
|
| 230 |
-
test_size: float = 0.2
|
| 231 |
) -> Tuple[
|
| 232 |
pd.DataFrame,
|
| 233 |
pd.DataFrame,
|
|
@@ -245,9 +242,9 @@ class PreprocessingPipeline:
|
|
| 245 |
InputValidator.validate_type(x, pd.DataFrame, "data")
|
| 246 |
InputValidator.validate_type(y, pd.DataFrame, "data")
|
| 247 |
InputValidator.validate_type(test_size, float, "test_size")
|
| 248 |
-
|
| 249 |
x_train, x_test, y_train, y_test = train_test_split(
|
| 250 |
-
x, y, test_size=
|
| 251 |
)
|
| 252 |
|
| 253 |
return x_train, x_test, y_train, y_test
|
|
@@ -283,16 +280,14 @@ class PreprocessingPipeline:
|
|
| 283 |
]
|
| 284 |
x = preprocessed_data.drop(columns_to_predict, axis=1)
|
| 285 |
y = preprocessed_data[columns_to_predict]
|
| 286 |
-
x_train, x_test, y_train, y_test = (
|
| 287 |
-
self.train_test_split(x, y)
|
| 288 |
-
)
|
| 289 |
|
| 290 |
# Step 5: Normalize data for 2 sets (x_train, x_test)
|
| 291 |
x_train[x_train.columns] = self.normalizer.fit_transform(
|
| 292 |
x_train[x_train.columns]
|
| 293 |
)
|
| 294 |
x_test[x_test.columns] = self.normalizer.transform(x_test[x_test.columns])
|
| 295 |
-
|
| 296 |
# Convert the normalized NumPy array back to a DataFrame
|
| 297 |
# normalized_x_train = pd.DataFrame(x_train, columns=preprocessed_data.columns, index=preprocessed_data.index)
|
| 298 |
|
|
@@ -307,6 +302,7 @@ class PreprocessingPipeline:
|
|
| 307 |
|
| 308 |
return preprocessed_data
|
| 309 |
|
|
|
|
| 310 |
if __name__ == "__main__":
|
| 311 |
pipeline = PreprocessingPipeline()
|
| 312 |
pipeline.run_pipeline()
|
|
|
|
| 224 |
self.normalizer = MinMaxScaler()
|
| 225 |
|
| 226 |
def train_test_split(
|
| 227 |
+
self, x: pd.DataFrame, y: pd.DataFrame, test_size: float = 0.2
|
|
|
|
|
|
|
|
|
|
| 228 |
) -> Tuple[
|
| 229 |
pd.DataFrame,
|
| 230 |
pd.DataFrame,
|
|
|
|
| 242 |
InputValidator.validate_type(x, pd.DataFrame, "data")
|
| 243 |
InputValidator.validate_type(y, pd.DataFrame, "data")
|
| 244 |
InputValidator.validate_type(test_size, float, "test_size")
|
| 245 |
+
|
| 246 |
x_train, x_test, y_train, y_test = train_test_split(
|
| 247 |
+
x, y, test_size=test_size, shuffle=False
|
| 248 |
)
|
| 249 |
|
| 250 |
return x_train, x_test, y_train, y_test
|
|
|
|
| 280 |
]
|
| 281 |
x = preprocessed_data.drop(columns_to_predict, axis=1)
|
| 282 |
y = preprocessed_data[columns_to_predict]
|
| 283 |
+
x_train, x_test, y_train, y_test = self.train_test_split(x, y)
|
|
|
|
|
|
|
| 284 |
|
| 285 |
# Step 5: Normalize data for 2 sets (x_train, x_test)
|
| 286 |
x_train[x_train.columns] = self.normalizer.fit_transform(
|
| 287 |
x_train[x_train.columns]
|
| 288 |
)
|
| 289 |
x_test[x_test.columns] = self.normalizer.transform(x_test[x_test.columns])
|
| 290 |
+
|
| 291 |
# Convert the normalized NumPy array back to a DataFrame
|
| 292 |
# normalized_x_train = pd.DataFrame(x_train, columns=preprocessed_data.columns, index=preprocessed_data.index)
|
| 293 |
|
|
|
|
| 302 |
|
| 303 |
return preprocessed_data
|
| 304 |
|
| 305 |
+
|
| 306 |
if __name__ == "__main__":
|
| 307 |
pipeline = PreprocessingPipeline()
|
| 308 |
pipeline.run_pipeline()
|