Spaces:
Build error
Build error
| from abc import ABC, abstractmethod | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder | |
| from src.core import setup_logger | |
| logger = setup_logger(__name__) | |
| class FeatureEngineeringStrategy(ABC): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| pass | |
| class DateTransformation(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying date transformation.") | |
| df_transformed = df.copy() | |
| date_col = 'date' if 'date' in df.columns else 'Date' | |
| if date_col not in df.columns: | |
| return df | |
| df_transformed[date_col] = pd.to_datetime(df[date_col]) | |
| df_transformed['Year'] = df_transformed[date_col].dt.year | |
| df_transformed['Month'] = df_transformed[date_col].dt.month | |
| df_transformed['Day'] = df_transformed[date_col].dt.day | |
| df_transformed['DayOfWeek'] = df_transformed[date_col].dt.dayofweek + 1 | |
| df_transformed['IsWeekend'] = (df_transformed[date_col].dt.dayofweek >= 5).astype(int) | |
| df_transformed['DayOfMonth'] = df_transformed[date_col].dt.day | |
| return df_transformed | |
| class FourierSeriesSeasonality(FeatureEngineeringStrategy): | |
| def __init__(self, period: float = 365.25, order: int = 3): | |
| self.period = period | |
| self.order = order | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info(f"Applying Fourier terms (order={self.order})") | |
| df_transformed = df.copy() | |
| date_col = 'date' if 'date' in df.columns else 'Date' | |
| times = pd.to_datetime(df_transformed[date_col]).values.view(np.int64) / 10**9 / (60 * 60 * 24) | |
| for i in range(1, self.order + 1): | |
| df_transformed[f'fourier_sin_{i}'] = np.sin(2 * np.pi * i * times / self.period) | |
| df_transformed[f'fourier_cos_{i}'] = np.cos(2 * np.pi * i * times / self.period) | |
| return df_transformed | |
| class EasterFeature(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying Easter feature.") | |
| df_transformed = df.copy() | |
| date_col = 'date' if 'date' in df.columns else 'Date' | |
| dates = pd.to_datetime(df_transformed[date_col]) | |
| easter_dates = {2013: '2013-03-31', 2014: '2014-04-20', 2015: '2015-04-05', 2016: '2016-03-27'} | |
| df_transformed['days_to_easter'] = 999 | |
| for year, date_str in easter_dates.items(): | |
| mask = dates.dt.year == year | |
| df_transformed.loc[mask, 'days_to_easter'] = (dates[mask] - pd.to_datetime(date_str)).dt.days | |
| df_transformed['easter_effect'] = ((df_transformed['days_to_easter'] >= -7) & (df_transformed['days_to_easter'] <= 7)).astype(int) | |
| return df_transformed | |
| class RossmannFeatureEngineering(FeatureEngineeringStrategy): | |
| def apply_transformation(self, df: pd.DataFrame) -> pd.DataFrame: | |
| logger.info("Applying Rossmann retail features.") | |
| df_transformed = df.copy() | |
| if 'StateHoliday' in df_transformed.columns: | |
| df_transformed['StateHoliday'] = df_transformed['StateHoliday'].astype(str).map({'0': 0, 'a': 1, 'b': 2, 'c': 3}).fillna(0) | |
| if 'CompetitionDistance' in df_transformed.columns: | |
| df_transformed['CompetitionDistance'] = df_transformed['CompetitionDistance'].fillna(100000) | |
| if 'CompetitionOpenSinceYear' in df_transformed.columns and 'Year' in df_transformed.columns: | |
| df_transformed['CompetitionOpenTime'] = 12 * (df_transformed['Year'] - df_transformed['CompetitionOpenSinceYear']) + (df_transformed['Month'] - df_transformed['CompetitionOpenSinceMonth']) | |
| df_transformed['CompetitionOpenTime'] = df_transformed['CompetitionOpenTime'].apply(lambda x: x if x > 0 else 0) | |
| return df_transformed | |
| class FeatureEngineer: | |
| def __init__(self, strategy: FeatureEngineeringStrategy): | |
| self._strategy = strategy | |
| def set_strategy(self, strategy: FeatureEngineeringStrategy): | |
| self._strategy = strategy | |
| def apply_feature_engineering(self, df: pd.DataFrame) -> pd.DataFrame: | |
| return self._strategy.apply_transformation(df) | |