Spaces:
Runtime error
Runtime error
| import math | |
| import pandas as pd | |
| from sktime.forecasting.base import ForecastingHorizon | |
| def split_x_y( | |
| data: pd.DataFrame, | |
| window_length: int, | |
| n_predict: int, | |
| freq: str, | |
| ): | |
| # print('[prep_data] ----- Start -----') | |
| datetime_index = data.index | |
| y = data['y'] | |
| X_train, X_forecast = None, None | |
| has_X = len(data.columns) > 1 | |
| if has_X: | |
| # print('[prep_data] - additional feature columns found') | |
| X = data.drop(columns='y').reset_index(drop=True) | |
| X_columns = X.columns | |
| X_train = pd.DataFrame() | |
| # ------------------------ # | |
| # Build lags of the X data # | |
| # ------------------------ # | |
| # print('[prep_data] - Building lags of features data') | |
| for n in range(0, window_length): | |
| # print('[prep_data],', n) | |
| shifted_columns = {} | |
| for col in X_columns: | |
| shifted_columns[col] = f'{col}_-{n_predict + n}' | |
| shifted = X.shift(n).rename(columns=shifted_columns) | |
| X_train = pd.concat( | |
| [X_train, shifted], | |
| axis=1) | |
| # print('[prep_data],', X_train) | |
| # print('[prep_data] - Backward fill lags of exog data') | |
| X_train = X_train.bfill() | |
| # Split last n_predict rows from exog_train as exog_pred | |
| X_forecast = X_train[-n_predict:] | |
| X_train = X_train[:-n_predict] | |
| # For both y and datetime index, need to cut off n_predict value to keep data consistent | |
| # print('[prep_data] - Cutting off y and datetime index be n_predict') | |
| y = y[n_predict:] | |
| datetime_index = datetime_index[n_predict:] | |
| X_train.set_index(datetime_index, inplace=True) | |
| fh = ForecastingHorizon( | |
| list(range(1, n_predict+1)), is_relative=True, freq=freq) | |
| # Cutoff is the last datetime value in the given data | |
| # meaning we'll forecast right after this point of time | |
| cutoff = datetime_index[-1] | |
| fh = fh.to_absolute(cutoff=cutoff) | |
| if X_forecast is not None: | |
| X_forecast.set_index(fh.to_pandas(), inplace=True) | |
| return (fh, y, X_train, X_forecast) | |
| def k_folds( | |
| data: pd.DataFrame, | |
| period: int, | |
| window_length: int, | |
| n_predict: int, | |
| freq: str | |
| ): | |
| ''' | |
| Amount of folds for testing is data size - window length and 2 seasonality period | |
| This will make sure the smallest fold will still have 2 seasons and n_predict value, these will be sufficient to train a minimal model | |
| ''' | |
| print('[k_folds] ----- START -----') | |
| k = math.floor((len(data) - n_predict - (2*period)) / period) | |
| folds = [] | |
| print('k', k) | |
| # Make sure k is not large than 10 | |
| k = min(k, 10) | |
| if k == 0: | |
| raise ValueError( | |
| f'Data should at least have length of 2 seasons + n_predict rows, \ | |
| currently length {len(data)}, expected length {2 * period + n_predict}') | |
| for i in reversed(range(1, k + 1)): | |
| d = data[: (-i * period)] | |
| folds.append( | |
| split_x_y( | |
| d, | |
| window_length, | |
| n_predict, | |
| freq | |
| )) | |
| print('[k_folds] ----- END -----') | |
| return folds | |