Spaces:
Runtime error
Runtime error
| from typing import List | |
| import logging | |
| import pandas as pd | |
| from statsmodels.tsa.tsatools import freq_to_period | |
| from sklearn.metrics import mean_squared_error | |
| from math import sqrt | |
| from .models import AllModels | |
| logging.basicConfig(level=logging.DEBUG) | |
| class Forecaster(): | |
| def __init__( | |
| self, | |
| ) -> None: | |
| logging.debug('Forecaster init') | |
| self.models = {} # Init models dict | |
| def fit(self, data): | |
| ''' | |
| Fot data into the forecaster | |
| ''' | |
| self.data = data | |
| pass | |
| def forecast( | |
| self, | |
| data: pd.DataFrame, | |
| models: str or List[str] = 'all', | |
| test: bool = False, | |
| enable_exog: bool = True | |
| ): | |
| ''' | |
| Main function, will perform the entire forecast operation | |
| data : pd.DataFrame, required | |
| Data for training the model, must contain "datetime", "y" columns, any additional column | |
| will be considered as exogenuous columns and be used for multivariate forecasting | |
| data must be cleaned without any missing value | |
| data's datetime column must be valid datetime strings, the frequency must be able to inference | |
| models : str or List[str], default='all' | |
| Selected model(s) to use fore forecasting. Default is "all", | |
| which will use all available models registered in models.AllModels | |
| test : bool, default=False | |
| Decide if the forecasting purpose is for testing or actual prediction | |
| Testing and prediction will not happen at the same time. 20% of the data | |
| will be splitted for testing | |
| enable_exog : bool, default=True | |
| If disabled, exog data will not be used in the model training, and the data will be considered as univariate data | |
| If enabled, and the data does contains exog data, for multivariate forecasting purpose, the data must be shifted | |
| by n_predict steps. This will cause a few things: | |
| 1. y column will be remapped to exog data that is n_predict unit of time ago | |
| 2. n_predict length of the oldest y will be trimmed off | |
| 3. n_predict length of exog values will be used for the forecasting | |
| ''' | |
| logging.debug('Start forecasting ...') | |
| self.enable_exog = enable_exog | |
| # Below properties will be init by prep_data() | |
| self.data: pd.DataFrame = None | |
| self.y = None | |
| self.exog = None | |
| self.freq: str = None | |
| self.period: int = None | |
| self.y_test = None | |
| self.n_predict: int = None # init by calculate_n_predict() | |
| self.kwargs = {} | |
| self.results = [] # Contains all result value | |
| # Prepare data, including set the datetime index, slit y and exog columns | |
| self.prep_data(data) | |
| # Calculate n_predict value based on self.period | |
| self.calculate_n_predict() | |
| # Init the basic kwargs for models to use | |
| self.init_kwargs() | |
| # Shift exog value by n_predict unit of time | |
| self.shift_exog() | |
| # Split test set for testing purpose | |
| if test: | |
| logging.debug('Testing ...') | |
| self.train_test_split() | |
| # ================================ # | |
| # Train models and make prediction # | |
| # ================================ # | |
| self.init_models(models) | |
| for model_name, model in self.models.items(): | |
| result = { | |
| 'model': model_name, | |
| 'result': None, | |
| 'evaluate': None, | |
| 'rmse': None, | |
| } | |
| fcst = model.forecast() | |
| # Assign the models result to the result dict | |
| if 'forecast' in fcst.keys(): | |
| result['result'] = fcst['forecast'] | |
| else: | |
| result['result'] = fcst | |
| if 'evaluate' in fcst.keys(): | |
| result['evaluate'] = fcst['evaluate'] | |
| if test: | |
| mse = mean_squared_error(self.y_test, result['result']) | |
| result['rmse'] = sqrt(mse) | |
| self.results.append(result) | |
| # - END of forecast - # | |
| def init_models(self, models): | |
| ''' | |
| Initialize models based on the provided parameter. | |
| Get self.models ready for forecasting | |
| ''' | |
| logging.debug('Init models') | |
| all_models = AllModels(models) | |
| self.models = all_models.init_models( | |
| self.y, | |
| self.n_predict, | |
| self.exog, | |
| **self.kwargs) | |
| def prep_data( | |
| self, | |
| data: pd.DataFrame | |
| ) -> None: | |
| logging.debug('Prep data') | |
| self.data = data.copy() | |
| self.data.set_index('datetime', inplace=True) | |
| self.data.index = pd.to_datetime(self.data.index) | |
| logging.debug('Inferencing freq and period') | |
| self.freq = pd.infer_freq(self.data.index) | |
| self.period = freq_to_period(self.freq) | |
| self.y = self.data['y'] | |
| if len(self.data.columns) > 1 and self.enable_exog: | |
| self.exog = self.data.drop(columns='y') | |
| def calculate_n_predict(self): | |
| ''' | |
| The n_predict will be the smaller number in 20, self.period value | |
| By default, try only predict 1 seasonal cycle | |
| ''' | |
| n_predict = min(20, self.period) | |
| # Set a max prediction size to be 20% of given data size | |
| if n_predict > int(len(self.data)*0.2): | |
| n_predict = int(len(self.data)*0.2) | |
| # Set a min prediction to be 4 | |
| if n_predict < 4: | |
| n_predict = 4 | |
| self.n_predict = n_predict | |
| def init_kwargs(self): | |
| ''' | |
| kwargs will be used for initializing models. | |
| kwargs contains all necessary information about the data | |
| ''' | |
| self.kwargs['period'] = self.period | |
| def train_test_split(self): | |
| ''' | |
| n_predict length of y value will be splitted out for testing | |
| although, each model will probably have it's own cross validator | |
| ''' | |
| logging.debug('Train test split') | |
| self.y_test = self.y[-self.n_predict:] | |
| self.y = self.y[:-self.n_predict] | |
| if self.exog is not None: | |
| self.exog = self.exog[:-self.n_predict] | |
| def shift_exog(self): | |
| if self.exog is not None: | |
| logging.debug('Shifted exog datetime index by n_predict period') | |
| self.exog.index = self.exog.index.shift( | |
| self.n_predict, freq=self.freq) | |
| logging.debug( | |
| 'Trimmed y by n_predict, so it is aligned with shifted exog') | |
| self.y = self.y[self.n_predict:] | |