Spaces:
Build error
Build error
| import pandas as pd | |
| from src.utils.helper_functions import save_parquet, load_parquet | |
| from config import Config | |
| config = vars(Config) | |
| def prepare_data( | |
| dataframe, | |
| data, | |
| split_local_test, | |
| add_datetime_features=True, | |
| add_lag_features=True | |
| ): | |
| print('Building features...') | |
| if add_datetime_features: | |
| dataframe = datetime_features(dataframe) | |
| if add_lag_features: | |
| dataframe = lag_features(dataframe, data, split_local_test) | |
| return dataframe | |
| def lag_features(dataframe, data, split_local_test): | |
| if split_local_test: | |
| backlog_cols = [col for col in data.columns if col.endswith('_backlog')] | |
| lag_backlog_cols = [] | |
| for col in backlog_cols: | |
| for shift in range(9,13,1): | |
| shift_col_name = f'{col}_shift_{shift}' | |
| data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift) | |
| lag_backlog_cols.append(shift_col_name) | |
| save_parquet( | |
| dataframe=data[lag_backlog_cols + ['product_id','date']], | |
| path=f'{config["fold_input_directory"]}/shift_features.parquet' | |
| ) | |
| map_data = data[lag_backlog_cols + ['product_id','date']] | |
| else: | |
| map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet') | |
| dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date']) | |
| return dataframe | |
| def datetime_features(dataframe, date='date', suffix=''): | |
| dataframe[f'{suffix}_month'] = dataframe[date].dt.month | |
| dataframe[f'{suffix}_year'] = dataframe[date].dt.year | |
| dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter | |
| dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week | |
| return dataframe |