atodorov284 commited on
Commit
c285c60
·
1 Parent(s): a5cacc1

Workflow fix for flake8 and reproducibility.

Browse files
.github/workflows/running.yml CHANGED
@@ -26,4 +26,4 @@ jobs:
26
  - name: Run main.py
27
  id: run_main
28
  run: |
29
- python3 main.py
 
26
  - name: Run main.py
27
  id: run_main
28
  run: |
29
+ python3 air-quality-forecast/main.py
air-quality-forecast/data_pipeline.py CHANGED
@@ -224,10 +224,7 @@ class PreprocessingPipeline:
224
  self.normalizer = MinMaxScaler()
225
 
226
  def train_test_split(
227
- self,
228
- x: pd.DataFrame,
229
- y: pd.DataFrame,
230
- test_size: float = 0.2
231
  ) -> Tuple[
232
  pd.DataFrame,
233
  pd.DataFrame,
@@ -245,9 +242,9 @@ class PreprocessingPipeline:
245
  InputValidator.validate_type(x, pd.DataFrame, "data")
246
  InputValidator.validate_type(y, pd.DataFrame, "data")
247
  InputValidator.validate_type(test_size, float, "test_size")
248
-
249
  x_train, x_test, y_train, y_test = train_test_split(
250
- x, y, test_size= test_size , shuffle=False
251
  )
252
 
253
  return x_train, x_test, y_train, y_test
@@ -283,16 +280,14 @@ class PreprocessingPipeline:
283
  ]
284
  x = preprocessed_data.drop(columns_to_predict, axis=1)
285
  y = preprocessed_data[columns_to_predict]
286
- x_train, x_test, y_train, y_test = (
287
- self.train_test_split(x, y)
288
- )
289
 
290
  # Step 5: Normalize data for 2 sets (x_train, x_test)
291
  x_train[x_train.columns] = self.normalizer.fit_transform(
292
  x_train[x_train.columns]
293
  )
294
  x_test[x_test.columns] = self.normalizer.transform(x_test[x_test.columns])
295
-
296
  # Convert the normalized NumPy array back to a DataFrame
297
  # normalized_x_train = pd.DataFrame(x_train, columns=preprocessed_data.columns, index=preprocessed_data.index)
298
 
@@ -307,6 +302,7 @@ class PreprocessingPipeline:
307
 
308
  return preprocessed_data
309
 
 
310
  if __name__ == "__main__":
311
  pipeline = PreprocessingPipeline()
312
  pipeline.run_pipeline()
 
224
  self.normalizer = MinMaxScaler()
225
 
226
  def train_test_split(
227
+ self, x: pd.DataFrame, y: pd.DataFrame, test_size: float = 0.2
 
 
 
228
  ) -> Tuple[
229
  pd.DataFrame,
230
  pd.DataFrame,
 
242
  InputValidator.validate_type(x, pd.DataFrame, "data")
243
  InputValidator.validate_type(y, pd.DataFrame, "data")
244
  InputValidator.validate_type(test_size, float, "test_size")
245
+
246
  x_train, x_test, y_train, y_test = train_test_split(
247
+ x, y, test_size=test_size, shuffle=False
248
  )
249
 
250
  return x_train, x_test, y_train, y_test
 
280
  ]
281
  x = preprocessed_data.drop(columns_to_predict, axis=1)
282
  y = preprocessed_data[columns_to_predict]
283
+ x_train, x_test, y_train, y_test = self.train_test_split(x, y)
 
 
284
 
285
  # Step 5: Normalize data for 2 sets (x_train, x_test)
286
  x_train[x_train.columns] = self.normalizer.fit_transform(
287
  x_train[x_train.columns]
288
  )
289
  x_test[x_test.columns] = self.normalizer.transform(x_test[x_test.columns])
290
+
291
  # Convert the normalized NumPy array back to a DataFrame
292
  # normalized_x_train = pd.DataFrame(x_train, columns=preprocessed_data.columns, index=preprocessed_data.index)
293
 
 
302
 
303
  return preprocessed_data
304
 
305
+
306
  if __name__ == "__main__":
307
  pipeline = PreprocessingPipeline()
308
  pipeline.run_pipeline()