File size: 6,718 Bytes
fa4fc8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import pandas as pd
import numpy as np


def _ensure_datetime_index(df):
    df = df.copy()
    if not isinstance(df.index, pd.DatetimeIndex):
        df = df.set_index('Reported Date')
        df.index = pd.to_datetime(df.index)
    return df


def create_forecasting_features(df):
    df = _ensure_datetime_index(df)
    target_map = df['Modal Price (Rs./Quintal)'].to_dict()

    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['weekofyear'] = df.index.isocalendar().week

    df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
    df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
    df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
    for window in [7, 14, 28]:
        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()

    df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
    df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')

    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
    df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
    df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)

    df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
    df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
    df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']

    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()

    return df.reset_index()


def create_forecasting_features_1m(df):
    df = _ensure_datetime_index(df)
    target_map = df['Modal Price (Rs./Quintal)'].to_dict()

    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['weekofyear'] = df.index.isocalendar().week

    df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
    df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
    df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)

    for window in [30, 60, 90]:
        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()

    df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
    df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()

    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')

    df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
    df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)

    df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
    df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
    df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']

    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()

    return df.reset_index()


def create_forecasting_features_3m(df):
    df = _ensure_datetime_index(df)
    target_map = df['Modal Price (Rs./Quintal)'].to_dict()

    df['dayofweek'] = df.index.dayofweek
    df['quarter'] = df.index.quarter
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['dayofyear'] = df.index.dayofyear
    df['weekofyear'] = df.index.isocalendar().week

    df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
    df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
    df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
    df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)

    for window in [90, 180, 270, 365]:
        df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
        df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()

    df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
    df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
    df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
    df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
    df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')

    df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
    df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
    df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
    df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)

    df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
    df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
    df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']

    df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
    df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()

    return df.reset_index()