barbosarafael commited on
Commit
eccb413
·
verified ·
1 Parent(s): af2178f

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +85 -0
  2. functions.py +201 -0
  3. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functions import *
2
+
3
+ #---- 1. Lendo os dados:
4
+
5
+ path_data = 'https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-hierarchical-forecasting/main/retail-usa-clothing.csv'
6
+
7
+ dados = read_data(path = path_data)
8
+
9
+ #---- 2. Criando a função de predict:
10
+
11
+ def fun_predict(days_to_forecast):
12
+
13
+ #---- a. Corrigindo os dados:
14
+ print('Corrigindo os dados')
15
+
16
+ df = clean_data(df = dados)
17
+
18
+ #---- b. Formatando os dados para os modelos:
19
+ print('Formatando os dados para os modelos:')
20
+
21
+ cols_hierarchical = ['region', 'state', 'item']
22
+
23
+ Y_df, S_df, tags = format_hierarchical_df(df = df, cols_hierarchical = cols_hierarchical)
24
+
25
+ #---- c. Aplicando os modelos de TS e ML:
26
+ print('Aplicando os modelos de TS e ML')
27
+
28
+ # Modelos:
29
+
30
+ hw = HoltWinters(season_length = 7, error_type = 'M') # Holtwinters com sazonalidade de 7 dias e erro do tipo Aditivo
31
+ lin_reg = LinearRegression() # Regressão linear
32
+
33
+ # Features de data:
34
+
35
+ @njit
36
+ def rolling_mean_7(x):
37
+ return rolling_mean(x, window_size = 7)
38
+
39
+ @njit
40
+ def rolling_mean_14(x):
41
+ return rolling_mean(x, window_size = 14)
42
+
43
+ @njit
44
+ def rolling_mean_21(x):
45
+ return rolling_mean(x, window_size = 21)
46
+
47
+ @njit
48
+ def rolling_mean_28(x):
49
+ return rolling_mean(x, window_size = 28)
50
+
51
+ df_recommendations = apply_models(Y_df = Y_df,
52
+ S_df = S_df,
53
+ tags = tags,
54
+ freq = 'D',
55
+ ts_models = [hw],
56
+ reconcilers_ts = [BottomUp()],
57
+ ml_models = [lin_reg],
58
+ lags_ml = [1, 7, 14, 21, 28, 30],
59
+ date_features_ml = ['dayofweek', 'month', 'year', 'quarter', 'day', 'week'],
60
+ lag_transforms_ml = {
61
+ 1: [expanding_mean],
62
+ 7: [rolling_mean_7],
63
+ 14: [rolling_mean_14],
64
+ 21: [rolling_mean_21],
65
+ 28: [rolling_mean_28],
66
+ },
67
+ reconcilers_ml = [OptimalCombination(method = 'ols', nonnegative = True)],
68
+ horizon_forecast = days_to_forecast)
69
+
70
+ print('Corrigindo o dataframe')
71
+ df_result = clean_recommendations(df_rec = df_recommendations, cols_hierarchical = cols_hierarchical)
72
+
73
+ return df_result
74
+
75
+
76
+ inputs = gr.Number(label = 'Dias para a projeção', value = 30)
77
+ outputs = [gr.DataFrame(headers = dados.columns.tolist())]
78
+
79
+ demo = gr.Interface(fn = fun_predict,
80
+ inputs = inputs,
81
+ # examples = [dados.head(3)],
82
+ outputs = outputs,
83
+ title = 'Projeções de múltiplas séries temporais')
84
+
85
+ demo.launch(share = True)
functions.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #------- Bibliotecas:
2
+
3
+ # Manipulação de dados:
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+ # Modelagem:
9
+
10
+ from hierarchicalforecast.utils import aggregate
11
+ from statsforecast import StatsForecast
12
+ from statsforecast.models import Naive, AutoARIMA, HoltWinters, AutoETS
13
+ from mlforecast import MLForecast
14
+ from sklearn.linear_model import LinearRegression
15
+ from sklearn.tree import DecisionTreeRegressor
16
+ from sklearn.ensemble import RandomForestRegressor
17
+ from lightgbm import LGBMRegressor
18
+ from xgboost import XGBRegressor
19
+ from numba import njit
20
+ from window_ops.expanding import expanding_mean
21
+ from window_ops.rolling import rolling_mean
22
+
23
+ # Reconciliação
24
+
25
+ from hierarchicalforecast.methods import BottomUp, OptimalCombination
26
+ from hierarchicalforecast.core import HierarchicalReconciliation
27
+
28
+ # Gradio
29
+
30
+ import gradio as gr
31
+
32
+
33
+ #------- Funções:
34
+
35
+ def read_data(path: str):
36
+
37
+ df = pd.read_csv(path)
38
+
39
+ return df
40
+
41
+
42
+ def clean_data(df: pd.DataFrame):
43
+
44
+ #---- 1. Excluindo a variável de country:
45
+
46
+ df = df\
47
+ .drop(columns = 'country')
48
+
49
+ #---- 2. Mudando o tipo da variável de date para datetime:
50
+
51
+ df['date'] = pd.to_datetime(df['date'])
52
+
53
+ #---- 3. Renomeando as variáveis de quantidade de vendas e data:
54
+ # date -> ds
55
+ # quantity -> y
56
+
57
+ df = df\
58
+ .rename(columns = {'date': 'ds',
59
+ 'quantity': 'y'})
60
+
61
+ return df
62
+
63
+ def format_hierarchical_df(df: pd.DataFrame, cols_hierarchical: list):
64
+
65
+ #---- 1. Cria uma lista de listas: [[col1], [col1, col2], ..., [col1, col2, coln]]
66
+
67
+ hier_list = [cols_hierarchical[:i] for i in range(1, len(cols_hierarchical) + 1)]
68
+
69
+ #---- 2. Aplica a função aggregate que formata os dados em que a lib hierarchical pede
70
+
71
+ Y_df, S_df, tags = aggregate(df = df, spec = hier_list)
72
+
73
+ return Y_df, S_df, tags
74
+
75
+
76
+ def apply_time_series_models(Y_df: pd.DataFrame,
77
+ S_df: pd.DataFrame,
78
+ tags: dict,
79
+ freq: str,
80
+ ts_models: None,
81
+ reconcilers_ts: None,
82
+ horizon_forecast: int = 30):
83
+
84
+ model_ts = StatsForecast(ts_models,
85
+ freq = freq,
86
+ n_jobs = -1)
87
+ model_ts.fit(Y_df)
88
+
89
+ Y_hat_df_ts = model_ts.forecast(h = horizon_forecast)
90
+
91
+ hrec_ts = HierarchicalReconciliation(reconcilers = reconcilers_ts)
92
+
93
+ Y_rec_df_ts = hrec_ts.reconcile(Y_hat_df = Y_hat_df_ts,
94
+ S = S_df,
95
+ tags = tags)
96
+
97
+ return Y_rec_df_ts.reset_index()
98
+
99
+ def apply_machine_learning_models(Y_df: pd.DataFrame,
100
+ S_df: pd.DataFrame,
101
+ tags: dict,
102
+ freq: str,
103
+ ml_models: None,
104
+ lags_ml: list,
105
+ date_features_ml: list,
106
+ lag_transforms_ml: dict,
107
+ reconcilers_ml: None,
108
+ horizon_forecast: int = 30):
109
+
110
+ model_ml = MLForecast(models = ml_models,
111
+ freq = freq,
112
+ num_threads = 6,
113
+ lags = lags_ml,
114
+ date_features = date_features_ml,
115
+ lag_transforms = lag_transforms_ml
116
+ )
117
+
118
+ model_ml.fit(Y_df.reset_index(), id_col = 'unique_id', time_col = 'ds', target_col = 'y')
119
+
120
+ Y_hat_df_ml = model_ml.predict(h = horizon_forecast)
121
+
122
+ hrec_ml = HierarchicalReconciliation(reconcilers = reconcilers_ml)
123
+
124
+ Y_rec_df_ml = hrec_ml.reconcile(Y_hat_df = Y_hat_df_ml,
125
+ S = S_df,
126
+ tags = tags)
127
+
128
+ Y_rec_df_ml = Y_rec_df_ml[[col for col in Y_rec_df_ml.columns if 'index' not in col]]
129
+
130
+ return Y_rec_df_ml.reset_index()
131
+
132
+
133
+ def apply_models(Y_df: pd.DataFrame,
134
+ S_df: pd.DataFrame,
135
+ tags: dict,
136
+ freq: str,
137
+ ts_models: None,
138
+ reconcilers_ts: None,
139
+ ml_models: None,
140
+ lags_ml: None,
141
+ date_features_ml: None,
142
+ lag_transforms_ml: None,
143
+ reconcilers_ml: None,
144
+ horizon_forecast: None):
145
+
146
+ if ts_models:
147
+
148
+ print('Executando os modelos de séries temporais...')
149
+
150
+ ts_recommendations = apply_time_series_models(Y_df = Y_df,
151
+ S_df = S_df,
152
+ tags = tags,
153
+ freq = freq,
154
+ ts_models = ts_models,
155
+ reconcilers_ts = reconcilers_ts,
156
+ horizon_forecast = horizon_forecast)
157
+ else:
158
+
159
+ ts_recommendations = pd.DataFrame(columns = ['ds', 'unique_id'])
160
+
161
+ if ml_models:
162
+
163
+ print('Executando os modelos de Machine Learning')
164
+
165
+ ml_recommendations = apply_machine_learning_models(Y_df = Y_df,
166
+ S_df = S_df,
167
+ tags = tags,
168
+ freq = freq,
169
+ ml_models = ml_models,
170
+ lags_ml = lags_ml,
171
+ date_features_ml = date_features_ml,
172
+ lag_transforms_ml = lag_transforms_ml,
173
+ reconcilers_ml = reconcilers_ml,
174
+ horizon_forecast = horizon_forecast)
175
+ else:
176
+
177
+ ml_recommendations = pd.DataFrame(columns = ['ds', 'unique_id'])
178
+
179
+ result_df = ts_recommendations.merge(ml_recommendations, on = ['ds', 'unique_id'], how = 'outer')
180
+
181
+ return result_df
182
+
183
+
184
+ def clean_recommendations(df_rec: pd.DataFrame, cols_hierarchical: list):
185
+
186
+ model_col = [col for col in df_rec.columns if '/' in col]
187
+
188
+ df_rec1 = df_rec[['unique_id', 'ds'] + model_col]\
189
+ .assign(\
190
+ nivel_hierarquia = lambda x: np.where(x['unique_id'].str.count('/') == 0, 1, x['unique_id'].str.count('/') + 1)
191
+ )\
192
+ .query(f'nivel_hierarquia == {len(cols_hierarchical)}')
193
+
194
+ df_rec1[cols_hierarchical] = df_rec1['unique_id'].str.split('/', n = len(cols_hierarchical), expand = True)
195
+
196
+ df_rec1 = df_rec1\
197
+ .rename(columns = {'ds': 'date'})\
198
+ .drop(columns = ['unique_id', 'nivel_hierarquia'])\
199
+ .reset_index(drop = True)[cols_hierarchical + ['date'] + model_col]
200
+
201
+ return df_rec1
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.1.4
2
+ numpy==1.23.5
3
+ hierarchicalforecast==0.4.1
4
+ statsforecast==1.7.0
5
+ mlforecast==0.11.4
6
+ scikit-learn==1.3.2
7
+ lightgbm==4.2.0
8
+ xgboost==2.0.3
9
+ gradio==4.15.0