Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| def _ensure_datetime_index(df): | |
| df = df.copy() | |
| if not isinstance(df.index, pd.DatetimeIndex): | |
| df = df.set_index('Reported Date') | |
| df.index = pd.to_datetime(df.index) | |
| return df | |
| def create_forecasting_features(df): | |
| df = _ensure_datetime_index(df) | |
| target_map = df['Modal Price (Rs./Quintal)'].to_dict() | |
| df['dayofweek'] = df.index.dayofweek | |
| df['quarter'] = df.index.quarter | |
| df['month'] = df.index.month | |
| df['year'] = df.index.year | |
| df['dayofyear'] = df.index.dayofyear | |
| df['weekofyear'] = df.index.isocalendar().week | |
| df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map) | |
| df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map) | |
| df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map) | |
| df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map) | |
| df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) | |
| for window in [7, 14, 28]: | |
| df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() | |
| df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() | |
| df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean() | |
| df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean() | |
| df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365) | |
| df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365) | |
| df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14) | |
| df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14) | |
| df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min() | |
| df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max() | |
| df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14'] | |
| df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() | |
| return df.reset_index() | |
| def create_forecasting_features_1m(df): | |
| df = _ensure_datetime_index(df) | |
| target_map = df['Modal Price (Rs./Quintal)'].to_dict() | |
| df['dayofweek'] = df.index.dayofweek | |
| df['quarter'] = df.index.quarter | |
| df['month'] = df.index.month | |
| df['year'] = df.index.year | |
| df['dayofyear'] = df.index.dayofyear | |
| df['weekofyear'] = df.index.isocalendar().week | |
| df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map) | |
| df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map) | |
| df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map) | |
| df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) | |
| df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map) | |
| for window in [30, 60, 90]: | |
| df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() | |
| df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() | |
| df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean() | |
| df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean() | |
| df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365) | |
| df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365) | |
| df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30) | |
| df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30) | |
| df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min() | |
| df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max() | |
| df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30'] | |
| df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() | |
| return df.reset_index() | |
| def create_forecasting_features_3m(df): | |
| df = _ensure_datetime_index(df) | |
| target_map = df['Modal Price (Rs./Quintal)'].to_dict() | |
| df['dayofweek'] = df.index.dayofweek | |
| df['quarter'] = df.index.quarter | |
| df['month'] = df.index.month | |
| df['year'] = df.index.year | |
| df['dayofyear'] = df.index.dayofyear | |
| df['weekofyear'] = df.index.isocalendar().week | |
| df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map) | |
| df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) | |
| df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map) | |
| df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map) | |
| for window in [90, 180, 270, 365]: | |
| df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() | |
| df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() | |
| df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean() | |
| df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean() | |
| df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90) | |
| df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90) | |
| df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30) | |
| df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30) | |
| df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min() | |
| df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max() | |
| df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90'] | |
| df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') | |
| df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() | |
| return df.reset_index() | |