ThejasRao's picture
Update agripredict app
fa4fc8b
import pandas as pd
import numpy as np
def _ensure_datetime_index(df):
df = df.copy()
if not isinstance(df.index, pd.DatetimeIndex):
df = df.set_index('Reported Date')
df.index = pd.to_datetime(df.index)
return df
def create_forecasting_features(df):
df = _ensure_datetime_index(df)
target_map = df['Modal Price (Rs./Quintal)'].to_dict()
df['dayofweek'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['month'] = df.index.month
df['year'] = df.index.year
df['dayofyear'] = df.index.dayofyear
df['weekofyear'] = df.index.isocalendar().week
df['lag14'] = (df.index - pd.Timedelta(days=14)).map(target_map)
df['lag28'] = (df.index - pd.Timedelta(days=28)).map(target_map)
df['lag56'] = (df.index - pd.Timedelta(days=56)).map(target_map)
df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
for window in [7, 14, 28]:
df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
df['ema7'] = df['Modal Price (Rs./Quintal)'].ewm(span=7, adjust=False).mean()
df['ema14'] = df['Modal Price (Rs./Quintal)'].ewm(span=14, adjust=False).mean()
df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
df['fourier_sin_14'] = np.sin(2 * np.pi * df.index.dayofyear / 14)
df['fourier_cos_14'] = np.cos(2 * np.pi * df.index.dayofyear / 14)
df['recent_min_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).min()
df['recent_max_14'] = (df.index - pd.Timedelta(days=14)).map(target_map).max()
df['recent_range_14'] = df['recent_max_14'] - df['recent_min_14']
df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
return df.reset_index()
def create_forecasting_features_1m(df):
df = _ensure_datetime_index(df)
target_map = df['Modal Price (Rs./Quintal)'].to_dict()
df['dayofweek'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['month'] = df.index.month
df['year'] = df.index.year
df['dayofyear'] = df.index.dayofyear
df['weekofyear'] = df.index.isocalendar().week
df['lag_30'] = (df.index - pd.Timedelta(days=30)).map(target_map)
df['lag_60'] = (df.index - pd.Timedelta(days=60)).map(target_map)
df['lag_90'] = (df.index - pd.Timedelta(days=90)).map(target_map)
df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
for window in [30, 60, 90]:
df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
df['ema_30'] = df['Modal Price (Rs./Quintal)'].ewm(span=30, adjust=False).mean()
df['ema_60'] = df['Modal Price (Rs./Quintal)'].ewm(span=60, adjust=False).mean()
df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
df['fourier_sin_365'] = np.sin(2 * np.pi * df.index.dayofyear / 365)
df['fourier_cos_365'] = np.cos(2 * np.pi * df.index.dayofyear / 365)
df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
df['recent_min_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).min()
df['recent_max_30'] = (df.index - pd.Timedelta(days=30)).map(target_map).max()
df['recent_range_30'] = df['recent_max_30'] - df['recent_min_30']
df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
return df.reset_index()
def create_forecasting_features_3m(df):
df = _ensure_datetime_index(df)
target_map = df['Modal Price (Rs./Quintal)'].to_dict()
df['dayofweek'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['month'] = df.index.month
df['year'] = df.index.year
df['dayofyear'] = df.index.dayofyear
df['weekofyear'] = df.index.isocalendar().week
df['lag_3months'] = (df.index - pd.DateOffset(months=3)).map(target_map)
df['lag_6months'] = (df.index - pd.DateOffset(months=6)).map(target_map)
df['lag_9months'] = (df.index - pd.DateOffset(months=9)).map(target_map)
df['lag_12months'] = (df.index - pd.DateOffset(months=12)).map(target_map)
for window in [90, 180, 270, 365]:
df[f'rolling_mean_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).mean()
df[f'rolling_std_{window}'] = df['Modal Price (Rs./Quintal)'].rolling(window=window, min_periods=1).std()
df['ema90'] = df['Modal Price (Rs./Quintal)'].ewm(span=90, adjust=False).mean()
df['ema180'] = df['Modal Price (Rs./Quintal)'].ewm(span=180, adjust=False).mean()
df['monthly_avg'] = df.groupby('month')['Modal Price (Rs./Quintal)'].transform('mean')
df['weekly_avg'] = df.groupby('weekofyear')['Modal Price (Rs./Quintal)'].transform('mean')
df['dayofweek_avg'] = df.groupby('dayofweek')['Modal Price (Rs./Quintal)'].transform('mean')
df['fourier_sin_90'] = np.sin(2 * np.pi * df.index.dayofyear / 90)
df['fourier_cos_90'] = np.cos(2 * np.pi * df.index.dayofyear / 90)
df['fourier_sin_30'] = np.sin(2 * np.pi * df.index.dayofyear / 30)
df['fourier_cos_30'] = np.cos(2 * np.pi * df.index.dayofyear / 30)
df['recent_min_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).min()
df['recent_max_90'] = (df.index - pd.Timedelta(days=90)).map(target_map).max()
df['recent_range_90'] = df['recent_max_90'] - df['recent_min_90']
df['yearly_avg'] = df.groupby('year')['Modal Price (Rs./Quintal)'].transform('mean')
df['cumulative_mean'] = df['Modal Price (Rs./Quintal)'].expanding().mean()
return df.reset_index()