Spaces:
Sleeping
Sleeping
feat: standardize feature engineering and push new production model
Browse files
models/rossmann_production_model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 53873380
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59d3bda897c6d9678abfce7bd5b631183319efbdf275113d83463ad3d52f706e
|
| 3 |
size 53873380
|
scripts/train_production_model.py
CHANGED
|
@@ -33,17 +33,16 @@ def run_production_training():
|
|
| 33 |
df_feat = pipeline.run_feature_engineering(df_raw)
|
| 34 |
|
| 35 |
# 3. Define Final Feature Set
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
df_feat[col] = le.fit_transform(df_feat[col].astype(str))
|
| 42 |
|
| 43 |
feature_cols = [
|
| 44 |
'Store', 'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday',
|
| 45 |
'Year', 'Month', 'Day', 'IsWeekend', 'DayOfMonth',
|
| 46 |
-
'CompetitionDistance', '
|
| 47 |
] + [c for c in df_feat.columns if 'fourier' in c or 'easter' in c]
|
| 48 |
|
| 49 |
# 4. Final Training (using all available data to create the 'Gold' model)
|
|
|
|
| 33 |
df_feat = pipeline.run_feature_engineering(df_raw)
|
| 34 |
|
| 35 |
# 3. Define Final Feature Set
|
| 36 |
+
# Consistent Encoding with app.py
|
| 37 |
+
if 'StoreType' in df_feat.columns:
|
| 38 |
+
df_feat['StoreType'] = df_feat['StoreType'].astype(str).map({'a':1, 'b':2, 'c':3, 'd':4}).fillna(0)
|
| 39 |
+
if 'Assortment' in df_feat.columns:
|
| 40 |
+
df_feat['Assortment'] = df_feat['Assortment'].astype(str).map({'a':1, 'b':2, 'c':3}).fillna(0)
|
|
|
|
| 41 |
|
| 42 |
feature_cols = [
|
| 43 |
'Store', 'DayOfWeek', 'Promo', 'StateHoliday', 'SchoolHoliday',
|
| 44 |
'Year', 'Month', 'Day', 'IsWeekend', 'DayOfMonth',
|
| 45 |
+
'CompetitionDistance', 'StoreType', 'Assortment'
|
| 46 |
] + [c for c in df_feat.columns if 'fourier' in c or 'easter' in c]
|
| 47 |
|
| 48 |
# 4. Final Training (using all available data to create the 'Gold' model)
|
src/features.py
CHANGED
|
@@ -22,7 +22,7 @@ class DateTransformation(FeatureEngineeringStrategy):
|
|
| 22 |
df_transformed['Year'] = df_transformed[date_col].dt.year
|
| 23 |
df_transformed['Month'] = df_transformed[date_col].dt.month
|
| 24 |
df_transformed['Day'] = df_transformed[date_col].dt.day
|
| 25 |
-
df_transformed['DayOfWeek'] = df_transformed[date_col].dt.dayofweek
|
| 26 |
df_transformed['IsWeekend'] = (df_transformed[date_col].dt.dayofweek >= 5).astype(int)
|
| 27 |
df_transformed['DayOfMonth'] = df_transformed[date_col].dt.day
|
| 28 |
return df_transformed
|
|
|
|
| 22 |
df_transformed['Year'] = df_transformed[date_col].dt.year
|
| 23 |
df_transformed['Month'] = df_transformed[date_col].dt.month
|
| 24 |
df_transformed['Day'] = df_transformed[date_col].dt.day
|
| 25 |
+
df_transformed['DayOfWeek'] = df_transformed[date_col].dt.dayofweek + 1
|
| 26 |
df_transformed['IsWeekend'] = (df_transformed[date_col].dt.dayofweek >= 5).astype(int)
|
| 27 |
df_transformed['DayOfMonth'] = df_transformed[date_col].dt.day
|
| 28 |
return df_transformed
|