import pandas as pd import numpy as np def _ensure_datetime_index(df): df = df.copy() if not isinstance(df.index, pd.DatetimeIndex): df = df.set_index('Reported Date') df.index = pd.to_datetime(df.index) return df def create_forecasting_features(df): df = _ensure_datetime_index(df) target_map = df['Modal Price (Rs./Quintal)'].to_dict() df['dayofweek'] = df.index.dayofweek df['quarter'] = df.index.quarter df['month'] = df.index.month df['year'] = df.index.year df['dayofyear'] = df.index.dayofyear df['weekofyear'] = df.index.isocalendar().week df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map) df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map) df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map) df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map) df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) for window in [7, 14, 28]: df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean() df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean() df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365) df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365) df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14) df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14) df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min() df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max() df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14'] df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() return df.reset_index() def create_forecasting_features_1m(df): df = _ensure_datetime_index(df) target_map = df['Modal Price (Rs./Quintal)'].to_dict() df['dayofweek'] = df.index.dayofweek df['quarter'] = df.index.quarter df['month'] = df.index.month df['year'] = df.index.year df['dayofyear'] = df.index.dayofyear df['weekofyear'] = df.index.isocalendar().week df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map) df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map) df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map) df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map) for window in [30, 60, 90]: df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean() df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean() df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365) df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365) df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30) df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30) df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min() df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max() df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30'] df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() return df.reset_index() def create_forecasting_features_3m(df): df = _ensure_datetime_index(df) target_map = df['Modal Price (Rs./Quintal)'].to_dict() df['dayofweek'] = df.index.dayofweek df['quarter'] = df.index.quarter df['month'] = df.index.month df['year'] = df.index.year df['dayofyear'] = df.index.dayofyear df['weekofyear'] = df.index.isocalendar().week df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map) df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map) df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map) df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map) for window in [90, 180, 270, 365]: df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean() df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std() df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean() df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean() df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean') df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean') df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean') df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90) df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90) df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30) df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30) df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min() df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max() df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90'] df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean') df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean() return df.reset_index()