Spaces:

barbosarafael
/

multiple-time-series-forecast

Sleeping

App Files Files Community

barbosarafael commited on Jan 22, 2024

Commit

eccb413

verified ·

1 Parent(s): af2178f

Upload 3 files

Browse files

Files changed (3) hide show

app.py +85 -0
functions.py +201 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from functions import *
+#---- 1. Lendo os dados:
+path_data = 'https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-hierarchical-forecasting/main/retail-usa-clothing.csv'
+dados = read_data(path = path_data)
+#---- 2. Criando a função de predict:
+def fun_predict(days_to_forecast):
+  #---- a. Corrigindo os dados:
+  print('Corrigindo os dados')
+  df = clean_data(df = dados)
+  #---- b. Formatando os dados para os modelos:
+  print('Formatando os dados para os modelos:')
+  cols_hierarchical = ['region', 'state', 'item']
+  Y_df, S_df, tags = format_hierarchical_df(df = df, cols_hierarchical = cols_hierarchical)
+  #---- c. Aplicando os modelos de TS e ML:
+  print('Aplicando os modelos de TS e ML')
+  # Modelos:
+  hw = HoltWinters(season_length = 7, error_type = 'M') # Holtwinters com sazonalidade de 7 dias e erro do tipo Aditivo
+  lin_reg = LinearRegression() # Regressão linear
+  # Features de data:
+  @njit
+  def rolling_mean_7(x):
+      return rolling_mean(x, window_size = 7)
+  @njit
+  def rolling_mean_14(x):
+      return rolling_mean(x, window_size = 14)
+  @njit
+  def rolling_mean_21(x):
+      return rolling_mean(x, window_size = 21)
+  @njit
+  def rolling_mean_28(x):
+      return rolling_mean(x, window_size = 28)
+  df_recommendations =  apply_models(Y_df = Y_df,
+                                    S_df = S_df,
+                                    tags = tags,
+                                    freq = 'D',
+                                    ts_models = [hw],
+                                    reconcilers_ts = [BottomUp()],
+                                    ml_models = [lin_reg],
+                                    lags_ml = [1, 7, 14, 21, 28, 30],
+                                    date_features_ml = ['dayofweek', 'month', 'year', 'quarter', 'day', 'week'],
+                                    lag_transforms_ml = {
+                                        1: [expanding_mean],
+                                        7: [rolling_mean_7],
+                                        14: [rolling_mean_14],
+                                        21: [rolling_mean_21],
+                                        28: [rolling_mean_28],
+                                    },
+                                    reconcilers_ml = [OptimalCombination(method = 'ols', nonnegative = True)],
+                                    horizon_forecast = days_to_forecast)
+  print('Corrigindo o dataframe')
+  df_result = clean_recommendations(df_rec = df_recommendations, cols_hierarchical = cols_hierarchical)
+  return df_result
+inputs = gr.Number(label = 'Dias para a projeção', value = 30)
+outputs = [gr.DataFrame(headers = dados.columns.tolist())]
+demo = gr.Interface(fn = fun_predict,
+                    inputs = inputs,
+                    # examples = [dados.head(3)],
+                    outputs = outputs,
+                    title = 'Projeções de múltiplas séries temporais')
+demo.launch(share = True)

functions.py ADDED Viewed

	@@ -0,0 +1,201 @@

+#------- Bibliotecas:
+# Manipulação de dados:
+import pandas as pd
+import numpy as np
+# Modelagem:
+from hierarchicalforecast.utils import aggregate
+from statsforecast import StatsForecast
+from statsforecast.models import Naive, AutoARIMA, HoltWinters, AutoETS
+from mlforecast import MLForecast
+from sklearn.linear_model import LinearRegression
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestRegressor
+from lightgbm import LGBMRegressor
+from xgboost import XGBRegressor
+from numba import njit
+from window_ops.expanding import expanding_mean
+from window_ops.rolling import rolling_mean
+# Reconciliação
+from hierarchicalforecast.methods import BottomUp, OptimalCombination
+from hierarchicalforecast.core import HierarchicalReconciliation
+# Gradio
+import gradio as gr
+#------- Funções:
+def read_data(path: str):
+    df = pd.read_csv(path)
+    return df
+def clean_data(df: pd.DataFrame):
+    #---- 1. Excluindo a variável de country:
+    df = df\
+        .drop(columns = 'country')
+    #---- 2. Mudando o tipo da variável de date para datetime:
+    df['date'] = pd.to_datetime(df['date'])
+    #---- 3. Renomeando as variáveis de quantidade de vendas e data:
+    # date -> ds
+    # quantity -> y
+    df = df\
+        .rename(columns = {'date': 'ds',
+                           'quantity': 'y'})
+    return df
+def format_hierarchical_df(df: pd.DataFrame, cols_hierarchical: list):
+    #---- 1. Cria uma lista de listas: [[col1], [col1, col2], ..., [col1, col2, coln]]
+    hier_list = [cols_hierarchical[:i] for i in range(1, len(cols_hierarchical) + 1)]
+    #---- 2. Aplica a função aggregate que formata os dados em que a lib hierarchical pede
+    Y_df, S_df, tags = aggregate(df = df, spec = hier_list)
+    return Y_df, S_df, tags
+def apply_time_series_models(Y_df: pd.DataFrame,
+                             S_df: pd.DataFrame,
+                             tags: dict,
+                             freq: str,
+                             ts_models: None,
+                             reconcilers_ts: None,
+                             horizon_forecast: int = 30):
+    model_ts = StatsForecast(ts_models,
+                             freq = freq,
+                             n_jobs = -1)
+    model_ts.fit(Y_df)
+    Y_hat_df_ts = model_ts.forecast(h = horizon_forecast)
+    hrec_ts = HierarchicalReconciliation(reconcilers = reconcilers_ts)
+    Y_rec_df_ts = hrec_ts.reconcile(Y_hat_df = Y_hat_df_ts,
+                                    S = S_df,
+                                    tags = tags)
+    return Y_rec_df_ts.reset_index()
+def apply_machine_learning_models(Y_df: pd.DataFrame,
+                                  S_df: pd.DataFrame,
+                                  tags: dict,
+                                  freq: str,
+                                  ml_models: None,
+                                  lags_ml: list,
+                                  date_features_ml: list,
+                                  lag_transforms_ml: dict,
+                                  reconcilers_ml: None,
+                                  horizon_forecast: int = 30):
+    model_ml = MLForecast(models = ml_models,
+                              freq = freq,
+                              num_threads = 6,
+                              lags = lags_ml,
+                              date_features = date_features_ml,
+                              lag_transforms = lag_transforms_ml
+                             )
+    model_ml.fit(Y_df.reset_index(), id_col = 'unique_id', time_col = 'ds', target_col = 'y')
+    Y_hat_df_ml = model_ml.predict(h = horizon_forecast)
+    hrec_ml = HierarchicalReconciliation(reconcilers = reconcilers_ml)
+    Y_rec_df_ml = hrec_ml.reconcile(Y_hat_df = Y_hat_df_ml,
+                            S = S_df,
+                            tags = tags)
+    Y_rec_df_ml = Y_rec_df_ml[[col for col in Y_rec_df_ml.columns if 'index' not in col]]
+    return Y_rec_df_ml.reset_index()
+def apply_models(Y_df: pd.DataFrame,
+                 S_df: pd.DataFrame,
+                 tags: dict,
+                 freq: str,
+                 ts_models: None,
+                 reconcilers_ts: None,
+                 ml_models: None,
+                 lags_ml: None,
+                 date_features_ml: None,
+                 lag_transforms_ml: None,
+                 reconcilers_ml: None,
+                 horizon_forecast: None):
+    if ts_models:
+        print('Executando os modelos de séries temporais...')
+        ts_recommendations = apply_time_series_models(Y_df = Y_df,
+                                                      S_df = S_df,
+                                                      tags = tags,
+                                                      freq = freq,
+                                                      ts_models = ts_models,
+                                                      reconcilers_ts = reconcilers_ts,
+                                                      horizon_forecast = horizon_forecast)
+    else:
+        ts_recommendations = pd.DataFrame(columns = ['ds', 'unique_id'])
+    if ml_models:
+        print('Executando os modelos de Machine Learning')
+        ml_recommendations = apply_machine_learning_models(Y_df = Y_df,
+                                                           S_df = S_df,
+                                                           tags = tags,
+                                                           freq = freq,
+                                                           ml_models = ml_models,
+                                                           lags_ml = lags_ml,
+                                                           date_features_ml = date_features_ml,
+                                                           lag_transforms_ml = lag_transforms_ml,
+                                                           reconcilers_ml = reconcilers_ml,
+                                                           horizon_forecast = horizon_forecast)
+    else:
+        ml_recommendations = pd.DataFrame(columns = ['ds', 'unique_id'])
+    result_df = ts_recommendations.merge(ml_recommendations, on = ['ds', 'unique_id'], how = 'outer')
+    return result_df
+def clean_recommendations(df_rec: pd.DataFrame, cols_hierarchical: list):
+    model_col = [col for col in df_rec.columns if '/' in col]
+    df_rec1 = df_rec[['unique_id', 'ds'] + model_col]\
+        .assign(\
+            nivel_hierarquia = lambda x: np.where(x['unique_id'].str.count('/') == 0, 1, x['unique_id'].str.count('/') + 1)
+        )\
+        .query(f'nivel_hierarquia == {len(cols_hierarchical)}')
+    df_rec1[cols_hierarchical] = df_rec1['unique_id'].str.split('/', n = len(cols_hierarchical), expand = True)
+    df_rec1 = df_rec1\
+        .rename(columns = {'ds': 'date'})\
+        .drop(columns = ['unique_id', 'nivel_hierarquia'])\
+        .reset_index(drop = True)[cols_hierarchical + ['date'] + model_col]
+    return df_rec1

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+pandas==2.1.4
+numpy==1.23.5
+hierarchicalforecast==0.4.1
+statsforecast==1.7.0
+mlforecast==0.11.4
+scikit-learn==1.3.2
+lightgbm==4.2.0
+xgboost==2.0.3
+gradio==4.15.0