Shkanov commited on
Commit
edd4787
·
verified ·
1 Parent(s): d85aba7

Upload 9 files

Browse files
src/Portfolio_optimization.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from experiment_runner_for_portfolio import DataLoader, Portfolio
4
+ import pickle
5
+ from sidebar_portfolio import sidebar
6
+
7
+
8
+ st.set_page_config(
9
+ page_title="Portfolio optimization",
10
+ page_icon="📊")
11
+
12
+ st.title("Portfolio Optimization")
13
+ sidebar_dict = sidebar()
14
+ run = st.sidebar.button('Run portfolio optimization')
15
+ dataloader = DataLoader()
16
+ portfolio = Portfolio()
17
+ if run:
18
+ st.header('Price Prediction Results')
19
+ dataloader.experiment_data(top_n = sidebar_dict['top_n'], num_scale_steps = sidebar_dict['num_scale_steps'],
20
+ scaling_strategy = sidebar_dict['scaling_strategy'], time_step_backward = sidebar_dict['time_step_backward'])
21
+ #plot_df, metrics_df, models_dict = experiment(ticker = sidebar_dict['ticker'], num_scale_steps= sidebar_dict['num_scale_steps'],
22
+ # scaling_strategy= sidebar_dict['scaling_strategy'], time_step_backward= sidebar_dict['time_step_backward'])
23
+ col1_tickers, col2_tickers = st.columns(2)
24
+ with col1_tickers:
25
+ st.subheader('Valid Tickers:')
26
+ st.write(dataloader.valid_tickers)
27
+ with col2_tickers:
28
+ st.subheader('Invalid Tickers:')
29
+ st.write(dataloader.invalid_tickers)
30
+
31
+ col1_date, col2_date = st.columns(2)
32
+ with col1_date:
33
+ st.write('Test Min Date:')
34
+ st.write(dataloader.global_min_date)
35
+ with col2_date:
36
+ st.write('Training Max Date:')
37
+ st.write(dataloader.global_max_date)
38
+
39
+ st.subheader('Model Metrics:')
40
+ for ticker in dataloader.valid_tickers:
41
+ st.write(f'{ticker}:')
42
+ st.write('Best model on test data MAPE: ', dataloader.tickers_dict[ticker]['metrics_df'].T.sort_values(by='Test data MAPE', ascending=True).index[0])
43
+ st.write(dataloader.tickers_dict[ticker]['metrics_df'])
44
+
45
+ st.header('Portfolio Optimization Results')
46
+ portfolio.optimize_portfolio(cov_matrix=dataloader.cov_matrix, validation_data=dataloader.validation_data, validation_actual=dataloader.validation_actual,
47
+ test_data=dataloader.test_data, test_actual=dataloader.test_actual, target_return=sidebar_dict['target_return'], allow_short=sidebar_dict['allow_short'])
48
+ col1_weights, col2_weights = st.columns(2)
49
+
50
+ with col1_weights:
51
+ st.subheader('Selected tickers:')
52
+ st.write(dataloader.selected_features)
53
+ with col2_weights:
54
+ st.subheader('Portfolio weights:')
55
+ st.write(portfolio.weights)
56
+
57
+
58
+ col1_results, col2_results = st.columns(2)
59
+ with col1_results:
60
+ st.write(f"Validation Return Accuracy: {portfolio.val_return_accuracy:.4f}")
61
+ st.write(f"Validation Volatility Accuracy: {portfolio.val_volatility_accuracy:.4f}")
62
+ st.write(f"Validation Sharpe Ratio Deviation: {portfolio.val_sharpe_deviation:.4f}")
63
+ st.write(f"Validation Pred Return Sum: {portfolio.val_sum_pred_returns:.4f}")
64
+ st.write(f"Validation Actual Return Sum: {portfolio.val_sum_realized_returns:.4f}")
65
+
66
+ with col2_results:
67
+ st.write(f"Test Return Accuracy: {portfolio.test_return_accuracy:.4f}")
68
+ st.write(f"Test Volatility Accuracy: {portfolio.test_volatility_accuracy:.4f}")
69
+ st.write(f"Test Sharpe Ratio Deviation: {portfolio.test_sharpe_deviation:.4f}")
70
+ st.write(f"Test Pred Return Sum: {portfolio.test_sum_pred_returns:.4f}")
71
+ st.write(f"Test Actual Return Sum: {portfolio.test_sum_realized_returns:.4f}")
72
+
src/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ # ForecastAGLT
src/experiment_runner_for_best_models.py ADDED
@@ -0,0 +1,568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def experiment(ticker, num_scale_steps, scaling_strategy, time_step_backward):
4
+ import pandas as pd
5
+ import numpy as np
6
+ import math
7
+
8
+ # For Evalution we will use these library
9
+
10
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
11
+ from sklearn.preprocessing import MinMaxScaler
12
+
13
+ # For model building we will use these library
14
+
15
+ from tensorflow.keras.models import Sequential
16
+ from tensorflow.keras.layers import Dense
17
+ from tensorflow.keras.layers import LSTM
18
+ from tensorflow.keras import initializers
19
+ from tensorflow.keras.callbacks import EarlyStopping
20
+
21
+ # For PLotting we will use these library
22
+ import matplotlib.pyplot as plt
23
+
24
+ import yfinance as yf
25
+
26
+ from gmdh import CriterionType, Criterion, Multi, Combi, Mia, Ria, PolynomialType
27
+ from chronos import ChronosPipeline
28
+ import torch
29
+ import pmdarima as pm
30
+ from pages.utils.utils import create_dataset, make_prediction
31
+ # @st.cache_data
32
+ def get_pipeline():
33
+ pipeline = ChronosPipeline.from_pretrained(
34
+ "amazon/chronos-t5-tiny",
35
+ device_map="cpu", # use "cpu" for CPU inference and "mps" for Apple Silicon
36
+ torch_dtype=torch.bfloat16)
37
+ return pipeline
38
+
39
+ pipeline = get_pipeline()
40
+
41
+ pd.options.display.float_format = '{:20,.4f}'.format
42
+ seed = 42
43
+ #tickers = ['BTC', 'ETH', 'BNB',
44
+ # 'XRP', 'STETH','ADA','DOGE',
45
+ # 'WTRX','LTC','SOL','TRX','DOT','MATIC','BCH','WBTC','TON11419',
46
+ # 'DAI','SHIB','AVAX','BUSD','LEO','LINK']
47
+ #intervals = ['1d', '1wk', '1mo']
48
+ #ticker = 'BTC' #st.selectbox("Ticker", options=tickers)
49
+ interval = '1d' #st.selectbox("Interval", options = intervals)
50
+
51
+ int_to_periods = {'1m':'5d', '2m':'1mo', '5m': '1mo','15m': '1mo','30m': '1mo','60m': '1mo','90m': '1mo',
52
+ '1h': '1y','1d': '10y','5d': '10y','1wk': '10y','1mo': '10y','3mo': '10y'}
53
+
54
+ period_cut = {'1d': '2022-02-19', '1wk': '2020-06-19', '1mo': '2014-06-19'}
55
+
56
+ try:
57
+ maindf = yf.download(tickers = f"{ticker}-USD", # list of tickers
58
+ period = 'max', #int_to_periods[interval], # time period
59
+ interval = interval, # trading interval
60
+ prepost = False, # download pre/post market hours data?
61
+ repair = True,) # repair obvious price errors e.g. 100x?
62
+ if len(maindf) == 0:
63
+ raise FileNotFoundError
64
+ except:
65
+ maindf = pd.read_csv(f'{ticker}.csv')
66
+ #maindf = yf.download('BTC-USD',start, end, auto_adjust=True)#['Close']
67
+ maindf=maindf.reset_index()
68
+ maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
69
+
70
+ #maindf = pd.read_csv('BTC-USD.csv')
71
+ print('Total number of days present in the dataset: ',maindf.shape[0])
72
+ print('Total number of fields present in the dataset: ',maindf.shape[1])
73
+ print(maindf.head())
74
+
75
+ y_overall = maindf.copy()
76
+ #scaling_strategy_list = ['median', 'average', 'undersampling']
77
+ #scale_step_type_list = ['D','W','M','Y']
78
+ scale_step_type = 'D'
79
+ #num_scale_steps = 1
80
+ #scaling_strategy == 'average'
81
+ y_overall = y_overall[['Date','Close']]
82
+ if num_scale_steps > 1:
83
+ #scaling_expander.selectbox('Метод масштабирования', scaling_strategy_list)
84
+ scaling_step_combined = str(num_scale_steps) + scale_step_type
85
+ # Определяем сегодняшнюю дату
86
+ today = pd.Timestamp.now().normalize()
87
+ if scaling_strategy == 'average':
88
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
89
+ # Добавляем колонку для конца интервала
90
+ y_overall['Interval_End'] = today - (
91
+ (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
92
+ scaling_step_combined)
93
+ # Группируем по интервалам и считаем среднее
94
+ y_overall = y_overall.groupby('Interval_End')['Close'].mean().reset_index()
95
+ # Сортируем результат
96
+ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
97
+ y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
98
+ elif scaling_strategy == 'median':
99
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).median()
100
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
101
+ # Добавляем колонку для конца интервала
102
+ y_overall['Interval_End'] = today - (
103
+ (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
104
+ scaling_step_combined)
105
+ # Группируем по интервалам и считаем среднее
106
+ y_overall = y_overall.groupby('Interval_End')['Close'].median().reset_index()
107
+ # Сортируем результат
108
+ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
109
+ y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
110
+ else:
111
+ # y_overall = y_overall.resample(on = 'Date', rule = scaling_step_combined).last()
112
+ # Устанавливаем 'Date' как индекс, если это ещё не сделано
113
+ # y_overall = y_overall.set_index('Date')
114
+ # y_overall.columns = y_overall.columns.droplevel(1)
115
+ y_overall = y_overall.resample(on='Date', rule=scaling_step_combined, origin='end').last()
116
+ y_overall = y_overall.reset_index()
117
+
118
+
119
+ #names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])
120
+ fig, ax = plt.subplots()
121
+ ax.plot(y_overall['Close'], label = 'Stock Close Price')
122
+ ax.legend()
123
+ ax.set_title(f'Динамика цены закрытия для {ticker}')
124
+
125
+
126
+ #st.pyplot(fig)
127
+ #ax.plot()
128
+
129
+ #time_step_backward = 15 #st.sidebar.slider('Количество шагов назад для предикторов', 5, 60, 15)
130
+ time_step_forward = 1 #st.sidebar.slider('Количество шагов вперед для таргета', 1, 60, 1)
131
+
132
+
133
+ pred_days = 1
134
+ recursive_pred = False
135
+ if time_step_forward == 1:
136
+ #expander = st.sidebar.expander('Режим ресурсивного прогноза')
137
+ pred_days = 15 #expander.slider('Количество шагов для ресурсивного прогноза', 1, 30, 15)
138
+ recursive_pred = True #expander.checkbox('Запустить рекурсивный прогноз')
139
+
140
+
141
+
142
+ GMDH = True #st.sidebar.checkbox('Добавить режим МГУА')
143
+ transformer = True #st.sidebar.checkbox('Добавить режим Transformer')
144
+ if GMDH:
145
+ #expander1 = st.sidebar.expander('Гиперпараметры МГУА')
146
+ GMDHs = {'Combi': Combi(), 'Multi': Multi(), 'Mia': Mia(), 'Ria': Ria()}
147
+ criterions = {'Критерий регулярности (несимметричная форма)': CriterionType.REGULARITY,
148
+ 'Критерий регулярности (симметричная форма)': CriterionType.SYM_REGULARITY,
149
+ 'Критерий стабильности (несимметричная форма)': CriterionType.STABILITY,
150
+ 'Критерий стабильности (симметричная форма)': CriterionType.SYM_STABILITY,
151
+ 'Критерий минимума смещения коэффициентов': CriterionType.UNBIASED_COEFFS,
152
+ 'Критерий минимума смещения решений (несимметричная форма)': CriterionType.UNBIASED_OUTPUTS,
153
+ 'Критерий минимума смещения решений (симметричная форма)': CriterionType.SYM_UNBIASED_OUTPUTS,
154
+ 'Абсолютно помехоустойчивый критерий (несимметричная форма)': CriterionType.ABSOLUTE_NOISE_IMMUNITY,
155
+ 'Абсолютно помехоустойчивый критерий (симметричная форма)': CriterionType.SYM_ABSOLUTE_NOISE_IMMUNITY}
156
+ polynoms = {'LINEAR': PolynomialType.LINEAR,
157
+ 'LINEAR_COV': PolynomialType.LINEAR_COV,
158
+ 'QUADRATIC': PolynomialType.QUADRATIC}
159
+ GMDH_algo1 = 'Multi' #expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
160
+ criterion1 = 'Критерий регулярности (несимметричная форма)' #expander1.selectbox("Внешний критерий", options = criterions.keys())
161
+ p_average1 = 1 #expander1.slider('p_average', 1, 10, 1)
162
+ limit1 = 0. #expander1.number_input('limit', value = 0.)
163
+ k_best1 = 1 #expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
164
+ polynom1 = 'LINEAR' #expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
165
+ GMDH_algo2 = 'Ria' #expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
166
+ criterion2 = 'Критерий регулярности (несимметричная форма)' #expander1.selectbox("Внешний критерий", options = criterions.keys())
167
+ p_average2 = 1 #expander1.slider('p_average', 1, 10, 1)
168
+ limit2 = 0. #expander1.number_input('limit', value = 0.)
169
+ k_best2 = 3 #expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
170
+ polynom2 = 'QUADRATIC' #expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
171
+
172
+ y_overall.columns = y_overall.columns.droplevel(1)#.droplevel()
173
+ #y_overall = y_overall.reset_index()
174
+
175
+ #if run:
176
+ # my_bar = st.progress(0, text='Model training progress. Truncating the dataset now')
177
+ # Lets First Take all the Close Price
178
+ closedf = y_overall[['Date', 'Close']].dropna() # maindf[['Date', 'Close']]
179
+ print("Shape of close dataframe:", closedf.shape)
180
+ closedf = closedf[-1000:] # closedf[closedf['Date'] > period_cut[interval]]
181
+ close_stock = closedf.copy()
182
+ print("Total data for prediction: ", closedf.shape[0])
183
+ # my_bar.progress(10 + 1, text='Truncated the dataset -> Scaling it')
184
+ # deleting date column and normalizing using MinMax Scaler
185
+
186
+ scaler = MinMaxScaler(feature_range=(0, 1))
187
+ # closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1))
188
+ print(closedf.shape)
189
+
190
+ # my_bar.progress(20 + 1, text='Scaled the dataset -> Splitting it into subsamples')
191
+ # we keep the training set as 60% and 40% testing set
192
+
193
+ training_size = int(len(closedf) * 0.70)
194
+ test_size = len(closedf) - training_size
195
+ assert test_size > 2*(time_step_backward + time_step_forward), "Test_size is shorter than 2 x time_step_backward + time_step_forward"
196
+ train_data, test_data = closedf[0:training_size], closedf[training_size:len(closedf)]
197
+ train_start_date, train_end_date = train_data['Date'].iloc[0], train_data['Date'].iloc[
198
+ -1] # TO BE ADDED TO PY FILE!!!
199
+
200
+ del closedf['Date'], train_data['Date'], test_data['Date'] # TO BE ADDED TO PY FILE!!!
201
+ train_data = scaler.fit_transform(train_data)
202
+ test_data = scaler.transform(test_data)
203
+ print("train_data: ", train_data.shape)
204
+ print("test_data: ", test_data.shape)
205
+
206
+ # my_bar.progress(30 + 1, text='Split it into subsamples -> Cutting them into observations')
207
+
208
+ X_train, y_train = create_dataset(train_data, time_step_backward, time_step_forward)
209
+ X_test, y_test = create_dataset(test_data, time_step_backward, time_step_forward)
210
+
211
+ print("X_train: ", X_train.shape)
212
+ print("y_train: ", y_train.shape)
213
+ print("X_test: ", X_test.shape)
214
+ print("y_test", y_test.shape)
215
+
216
+ # reshape input to be [samples, time steps, features] which is required for LSTM
217
+ X_train_gmdh = X_train.copy()
218
+ X_test_gmdh = X_test.copy()
219
+ X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
220
+ X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
221
+
222
+ print("X_train: ", X_train.shape)
223
+ print("X_test: ", X_test.shape)
224
+
225
+ # my_bar.progress(40 + 1, text='Cut it into observations -> Training the model')
226
+ model = Sequential()
227
+ model.add(LSTM(10, input_shape=(None, 1), activation="relu",
228
+ kernel_initializer=initializers.GlorotNormal(seed=seed),
229
+ bias_initializer=initializers.GlorotNormal(seed=seed)))
230
+ model.add(Dense(1,
231
+ kernel_initializer=initializers.GlorotNormal(seed=seed),
232
+ bias_initializer=initializers.GlorotNormal(seed=seed)))
233
+ model.compile(loss="mean_squared_error", optimizer="adam")
234
+ callback = EarlyStopping(monitor='loss', patience=30, restore_best_weights=True)
235
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=False,
236
+ callbacks=[callback])
237
+
238
+ arima_model = pm.auto_arima(train_data,
239
+ m=12, # frequency of series
240
+ seasonal=True, # TRUE if seasonal series
241
+ d=None, # let model determine 'd'
242
+ test='adf', # use adftest to find optimal 'd'
243
+ start_p=0, start_q=0, # minimum p and q
244
+ max_p=time_step_backward, max_q=time_step_backward, # maximum p and q
245
+ D=None, # let model determine 'D'
246
+ trace=True,
247
+ error_action='ignore',
248
+ suppress_warnings=True,
249
+ stepwise=True)
250
+ # st.text(arima_model.summary())
251
+ print(arima_model.summary())
252
+
253
+ if GMDH:
254
+ model_gmdh1 = GMDHs[GMDH_algo1]
255
+ if GMDH_algo1 == 'Combi':
256
+ model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
257
+ criterion=Criterion(criterion_type=criterions[criterion1]))
258
+ if GMDH_algo1 == 'Multi':
259
+ model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
260
+ criterion=Criterion(criterion_type=criterions[criterion1]),
261
+ k_best=k_best1)
262
+ if GMDH_algo1 in ['Ria', 'Mia']:
263
+ model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
264
+ criterion=Criterion(criterion_type=criterions[criterion1]),
265
+ k_best=k_best1, polynomial_type=polynoms[polynom1])
266
+ # st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
267
+ print(f"GMDH model 1: {model_gmdh1.get_best_polynomial()}")
268
+
269
+ model_gmdh2 = GMDHs[GMDH_algo2]
270
+ if GMDH_algo2 == 'Combi':
271
+ model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
272
+ criterion=Criterion(criterion_type=criterions[criterion2]))
273
+ if GMDH_algo2 == 'Multi':
274
+ model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
275
+ criterion=Criterion(criterion_type=criterions[criterion2]),
276
+ k_best=k_best2)
277
+ if GMDH_algo2 in ['Ria', 'Mia']:
278
+ model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
279
+ criterion=Criterion(criterion_type=criterions[criterion2]),
280
+ k_best=k_best2, polynomial_type=polynoms[polynom1])
281
+ # st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
282
+ print(f"GMDH model 2: {model_gmdh2.get_best_polynomial()}")
283
+ """
284
+ if transformer:
285
+ X_train_context = torch.tensor(X_train_gmdh)
286
+ X_test_context = torch.tensor(X_test_gmdh)
287
+ X_train_forecast = pipeline.predict(
288
+ X_train_context,
289
+ time_step_forward,
290
+ num_samples=3,
291
+ temperature=1.0,
292
+ top_k=50,
293
+ top_p=1.0)
294
+ X_test_forecast = pipeline.predict(
295
+ X_test_context,
296
+ time_step_forward,
297
+ num_samples=3,
298
+ temperature=1.0,
299
+ top_k=50,
300
+ top_p=1.0)
301
+ """
302
+
303
+ # my_bar.progress(70 + 1, text='Trained model -> Calculating loss')
304
+ import matplotlib.pyplot as plt
305
+
306
+ loss = history.history['loss']
307
+ val_loss = history.history['val_loss']
308
+
309
+ epochs = range(len(loss))
310
+
311
+ fig, ax = plt.subplots()
312
+ ax.plot(epochs, loss, 'r', label='Training loss')
313
+ ax.plot(epochs, val_loss, 'b', label='Validation loss')
314
+ ax.legend()
315
+ ax.set_title('Потери на обучении и валидации')
316
+
317
+ # st.pyplot(fig)
318
+ ax.plot()
319
+ # my_bar.progress(80 + 1, text='Calculated loss -> Scoring the dataset')
320
+
321
+ original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1))
322
+ original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1))
323
+
324
+ train_predict, test_predict = make_prediction(X_train, X_test, method='LSTM', model=model,
325
+ scaler=scaler, time_step_forward=time_step_forward)
326
+ train_predict_arima, test_predict_arima = make_prediction(X_train, X_test, method='SARIMA', model=arima_model,
327
+ scaler=scaler, time_step_forward=time_step_forward)
328
+ if GMDH:
329
+ train_predict_gmdh1, test_predict_gmdh1 = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH',
330
+ model=model_gmdh1,
331
+ scaler=scaler, time_step_forward=time_step_forward)
332
+ train_predict_gmdh2, test_predict_gmdh2 = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH',
333
+ model=model_gmdh2,
334
+ scaler=scaler, time_step_forward=time_step_forward)
335
+ if transformer:
336
+ X_train_forecast_median, X_test_forecast_median = make_prediction(X_train_gmdh, X_test_gmdh,
337
+ method='Transformer', model=pipeline,
338
+ scaler=scaler,
339
+ time_step_forward=time_step_forward)
340
+
341
+ # Evaluation metrices RMSE and MAE
342
+ metrics_tmp = {}
343
+ metrics1 = {}
344
+ metrics1['LSTM'] = []
345
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict))
346
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict)
347
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict)
348
+ metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict)
349
+ print("-------------------------------------------------------------------------------------")
350
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict))
351
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict)
352
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict)
353
+ metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict)
354
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict)
355
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict)
356
+ for metric in metrics_tmp:
357
+ print(metric, ': ', metrics_tmp[metric])
358
+ metrics1['LSTM'].append(metrics_tmp[metric])
359
+
360
+ metrics1['SARIMA'] = []
361
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_arima))
362
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_arima)
363
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_arima)
364
+ metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_arima)
365
+ print("-------------------------------------------------------------------------------------")
366
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_arima))
367
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_arima)
368
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_arima)
369
+ metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_arima)
370
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_arima)
371
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_arima)
372
+ for metric in metrics_tmp:
373
+ print(metric, ': ', metrics_tmp[metric])
374
+ metrics1['SARIMA'].append(metrics_tmp[metric])
375
+ if GMDH:
376
+ metrics1['GMDH_1'] = []
377
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh1))
378
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh1)
379
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh1)
380
+ metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_gmdh1)
381
+ print("-------------------------------------------------------------------------------------")
382
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh1))
383
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh1)
384
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh1)
385
+ metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_gmdh1)
386
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh1)
387
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh1)
388
+ for metric in metrics_tmp:
389
+ print(metric, ': ', metrics_tmp[metric])
390
+ metrics1['GMDH_1'].append(metrics_tmp[metric])
391
+
392
+ metrics1['GMDH_2'] = []
393
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh2))
394
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh2)
395
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh2)
396
+ metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_gmdh2)
397
+ print("-------------------------------------------------------------------------------------")
398
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh2))
399
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh2)
400
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh2)
401
+ metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_gmdh2)
402
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh2)
403
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh2)
404
+ for metric in metrics_tmp:
405
+ print(metric, ': ', metrics_tmp[metric])
406
+ metrics1['GMDH_2'].append(metrics_tmp[metric])
407
+
408
+ if transformer:
409
+ metrics1['Transformer'] = []
410
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, X_train_forecast_median))
411
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, X_train_forecast_median)
412
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, X_train_forecast_median)
413
+ metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, X_train_forecast_median)
414
+ print("-------------------------------------------------------------------------------------")
415
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, X_test_forecast_median))
416
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, X_test_forecast_median)
417
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, X_test_forecast_median)
418
+ metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, X_test_forecast_median)
419
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, X_train_forecast_median)
420
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, X_test_forecast_median)
421
+ for metric in metrics_tmp:
422
+ print(metric, ': ', metrics_tmp[metric])
423
+ metrics1['Transformer'].append(metrics_tmp[metric])
424
+
425
+ metrics_df = pd.DataFrame.from_dict(metrics1, orient='columns') # (metrics, columns = ['LSTM', 'GMDH'])
426
+ metrics_df.index = metrics_tmp.keys()
427
+ # st.write(metrics_df)
428
+ metrics_df.round(3)
429
+ print(metrics_df)
430
+ # my_bar.progress(90 + 1, text='Calculated performance metrics -> Plotting predictions')
431
+
432
+ # shift train predictions for plotting
433
+
434
+ lag = time_step_backward + (time_step_forward - 1)
435
+ trainPredictPlot_arima = np.empty_like(closedf)
436
+ trainPredictPlot_arima[:, :] = np.nan
437
+ trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :] = train_predict_arima
438
+ print(trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :].shape, train_predict_arima.shape)
439
+ print("Train predicted data: ", trainPredictPlot_arima.shape)
440
+
441
+ # shift test predictions for plotting
442
+ testPredictPlot_arima = np.empty_like(closedf)
443
+ testPredictPlot_arima[:, :] = np.nan
444
+ testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :] = test_predict_arima
445
+ print(testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :].shape, test_predict_arima.shape)
446
+ print("Test predicted data: ", testPredictPlot_arima.shape)
447
+
448
+ # lag = time_step_backward + (time_step_forward - 1)
449
+ trainPredictPlot = np.empty_like(closedf)
450
+ trainPredictPlot[:, :] = np.nan
451
+ trainPredictPlot[lag:len(train_predict) + lag, :] = train_predict
452
+ print(trainPredictPlot[lag:len(train_predict) + lag, :].shape, train_predict.shape)
453
+ print("Train predicted data: ", trainPredictPlot.shape)
454
+
455
+ # shift test predictions for plotting
456
+ testPredictPlot = np.empty_like(closedf)
457
+ testPredictPlot[:, :] = np.nan
458
+ testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :] = test_predict
459
+ print(testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :].shape, test_predict.shape)
460
+ print("Test predicted data: ", testPredictPlot.shape)
461
+
462
+ if GMDH:
463
+ trainPredictPlot_gmdh1 = np.empty_like(closedf)
464
+ trainPredictPlot_gmdh1[:, :] = np.nan
465
+ trainPredictPlot_gmdh1[lag:len(train_predict_gmdh1) + lag, :] = train_predict_gmdh1
466
+ print(trainPredictPlot_gmdh1[lag:len(train_predict_gmdh1) + lag, :].shape, train_predict_gmdh1.shape)
467
+
468
+ testPredictPlot_gmdh1 = np.empty_like(closedf)
469
+ testPredictPlot_gmdh1[:, :] = np.nan
470
+ testPredictPlot_gmdh1[len(train_predict_gmdh1) + (lag * 2):len(closedf), :] = test_predict_gmdh1
471
+ print(testPredictPlot_gmdh1[len(train_predict_gmdh1) + (lag * 2):len(closedf), :].shape, test_predict_gmdh1.shape)
472
+
473
+
474
+ trainPredictPlot_gmdh2 = np.empty_like(closedf)
475
+ trainPredictPlot_gmdh2[:, :] = np.nan
476
+ trainPredictPlot_gmdh2[lag:len(train_predict_gmdh2) + lag, :] = train_predict_gmdh2
477
+ print(trainPredictPlot_gmdh2[lag:len(train_predict_gmdh2) + lag, :].shape, train_predict_gmdh2.shape)
478
+
479
+ testPredictPlot_gmdh2 = np.empty_like(closedf)
480
+ testPredictPlot_gmdh2[:, :] = np.nan
481
+ testPredictPlot_gmdh2[len(train_predict_gmdh2) + (lag * 2):len(closedf), :] = test_predict_gmdh2
482
+ print(testPredictPlot_gmdh2[len(train_predict_gmdh2) + (lag * 2):len(closedf), :].shape, test_predict_gmdh2.shape)
483
+
484
+ if transformer:
485
+ trainPredictPlot_transformer = np.empty_like(closedf)
486
+ trainPredictPlot_transformer[:, :] = np.nan
487
+ trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :] = X_train_forecast_median
488
+ print(trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :].shape,
489
+ X_train_forecast_median.shape)
490
+
491
+ testPredictPlot_transformer = np.empty_like(closedf)
492
+ testPredictPlot_transformer[:, :] = np.nan
493
+ testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :] = X_test_forecast_median
494
+ print(testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :].shape,
495
+ X_test_forecast_median.shape)
496
+
497
+ if GMDH:
498
+ if transformer:
499
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
500
+ 'original_close': close_stock['Close'],
501
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
502
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
503
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
504
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
505
+ 'train_predicted_close_gmdh_1': trainPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
506
+ 'test_predicted_close_gmdh_1': testPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
507
+ 'train_predicted_close_gmdh_2': trainPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
508
+ 'test_predicted_close_gmdh_2': testPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
509
+ 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
510
+ 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
511
+ elif not transformer:
512
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
513
+ 'original_close': close_stock['Close'],
514
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
515
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
516
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
517
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
518
+ 'train_predicted_close_gmdh_1': trainPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
519
+ 'test_predicted_close_gmdh_1': testPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
520
+ 'train_predicted_close_gmdh_2': trainPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
521
+ 'test_predicted_close_gmdh_2': testPredictPlot_gmdh2.reshape(1, -1)[0].tolist()})
522
+ elif not GMDH:
523
+ if transformer:
524
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
525
+ 'original_close': close_stock['Close'],
526
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
527
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
528
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
529
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
530
+ 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[
531
+ 0].tolist(),
532
+ 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[
533
+ 0].tolist()})
534
+ else:
535
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
536
+ 'original_close': close_stock['Close'],
537
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
538
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
539
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
540
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()})
541
+ fig, ax = plt.subplots()
542
+ ax.plot(plotdf['date'], plotdf['original_close'], label='Оригинальная цена закрытия')
543
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_arima'],
544
+ label='Предсказанная цена закрытия на тренировке SARIMA')
545
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_arima'], label='Предсказанная цена закрытия на тесте SARIMA')
546
+ ax.plot(plotdf['date'], plotdf['train_predicted_close'], label='Предсказанная цена закрытия на тренировке')
547
+ ax.plot(plotdf['date'], plotdf['test_predicted_close'], label='Предсказанная цена закрытия на тесте')
548
+ if GMDH:
549
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh_1'],
550
+ label='Предсказанная цена закрытия на тренировке GMDH_1')
551
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh_1'], label='Предсказанная цена закрытия на тесте GMDH_1')
552
+
553
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh_2'],
554
+ label='Предсказанная цена закрытия на тренировке GMDH_2')
555
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh_2'], label='Предсказанная цена закрытия на тесте GMDH_2')
556
+ if transformer:
557
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_transformer'],
558
+ label='Предсказанная цена закрытия на тренировке Transformer')
559
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_transformer'],
560
+ label='Предсказанная цена закрытия на тесте Transformer')
561
+ ax.legend()
562
+ ax.set_title("Сравнение исходных и смоделированных цен")
563
+ # st.pyplot(fig)
564
+ #ax.plot()
565
+
566
+ models_dict = {'LSTM': model, 'SARIMA': arima_model, 'GMDH_1': model_gmdh1, 'GMDH_2': model_gmdh2, 'Transformer': pipeline}
567
+
568
+ return plotdf, metrics_df, models_dict
src/experiment_runner_for_portfolio.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from experiment_runner_for_best_models import experiment
3
+ from datetime import datetime
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import scipy.optimize as sco
7
+
8
+
9
+ class DataLoader():
10
+ def __init__(self, correlation_threshold: float = 0.9):
11
+ self.correlation_threshold = correlation_threshold
12
+ # Function to get top N cryptocurrency tickers
13
+ def get_top_crypto_tickers(self, n):
14
+
15
+ url = 'https://api.coingecko.com/api/v3/coins/markets'
16
+ params = {
17
+ 'vs_currency': 'usd',
18
+ 'order': 'market_cap_desc',
19
+ 'per_page': n,
20
+ 'page': 1,
21
+ 'sparkline': 'false'
22
+ }
23
+ response = requests.get(url, params=params)
24
+ data = response.json()
25
+ tickers = [coin['symbol'].upper() for coin in data]
26
+ return tickers
27
+
28
+
29
+ # Function to validate if a ticker is compatible with yfinance
30
+ def validate_ticker(self, ticker):
31
+ import yfinance as yf
32
+ try:
33
+ ticker += '-USD'
34
+ info = yf.Ticker(ticker).info
35
+ return bool(info) # Returns True if info is not empty
36
+ except Exception:
37
+ return False
38
+
39
+ def experiment_data(self, top_n: int = 3, num_scale_steps: int = 1, scaling_strategy: str = 'average', time_step_backward: int = 15):
40
+ # Retrieve top N tickers
41
+ #top_n = 10
42
+ self.tickers = self.get_top_crypto_tickers(top_n)
43
+ # Validate tickers for compatibility with yfinance
44
+ self.valid_tickers = [ticker for ticker in self.tickers if self.validate_ticker(ticker)]
45
+ print("Compatible tickers for yfinance:", len(self.valid_tickers))
46
+ self.invalid_tickers = []
47
+
48
+ # Run experiments for each valid ticker
49
+ self.tickers_dict = {}
50
+ for ticker in self.valid_tickers:
51
+ try:
52
+ self.tickers_dict[ticker] = {}
53
+ plot_df, metrics_df, models_dict = experiment(ticker=ticker, num_scale_steps=num_scale_steps,
54
+ scaling_strategy=scaling_strategy, time_step_backward=time_step_backward)
55
+ self.tickers_dict[ticker]['plot_df'] = plot_df
56
+ self.tickers_dict[ticker]['metrics_df'] = metrics_df
57
+ self.tickers_dict[ticker]['models_dict'] = models_dict
58
+ except AssertionError as e: # Или другой конкретный тип ошибки
59
+ print('EXCEPTION ', str(e), ticker)
60
+ self.invalid_tickers.append(ticker)
61
+ continue
62
+
63
+ for invalid_ticker in self.invalid_tickers:
64
+ self.valid_tickers.remove(invalid_ticker)
65
+
66
+ # Mapping for prediction columns
67
+ test_predictions_model_mapper = {
68
+ 'SARIMA': 'test_predicted_close_arima',
69
+ 'LSTM': 'test_predicted_close',
70
+ 'GMDH_1': 'test_predicted_close_gmdh_1',
71
+ 'GMDH_2': 'test_predicted_close_gmdh_2',
72
+ 'Transformer': 'test_predicted_close_transformer'
73
+ }
74
+ train_predictions_model_mapper = {
75
+ 'SARIMA': 'train_predicted_close_arima',
76
+ 'LSTM': 'train_predicted_close',
77
+ 'GMDH_1': 'train_predicted_close_gmdh_1',
78
+ 'GMDH_2': 'train_predicted_close_gmdh_2',
79
+ 'Transformer': 'train_predicted_close_transformer'
80
+ }
81
+
82
+ # Determine global training and testing periods
83
+ self.global_min_date = datetime(2000, 1, 1, 0, 0)
84
+ self.global_max_date = datetime.now()
85
+ for ticker in self.valid_tickers:
86
+ train_last_valid_index = self.tickers_dict[ticker]['plot_df']['train_predicted_close_arima'].last_valid_index()
87
+ train_last_date = self.tickers_dict[ticker]['plot_df'].loc[train_last_valid_index, 'date']
88
+ if train_last_date < self.global_max_date:
89
+ self.global_max_date = train_last_date
90
+
91
+ test_first_valid_index = self.tickers_dict[ticker]['plot_df']['test_predicted_close_arima'].first_valid_index()
92
+ test_first_date = self.tickers_dict[ticker]['plot_df'].loc[test_first_valid_index, 'date']
93
+ if test_first_date > self.global_min_date:
94
+ self.global_min_date = test_first_date
95
+
96
+ print(train_last_date, train_last_valid_index, test_first_date, test_first_valid_index)
97
+
98
+ print(self.global_min_date , self.global_max_date)
99
+
100
+ # Collect predictions for the global periods
101
+ self.train_predictions_df_list = []
102
+ self.test_predictions_df_list = []
103
+ self.actual_prices_train = []
104
+ self.actual_prices_test = []
105
+ for ticker in tqdm(self.valid_tickers):
106
+ best_model = self.tickers_dict[ticker]['metrics_df'].T.sort_values(by='Test data MAPE', ascending=True).index[0]
107
+ train_predictions = self.tickers_dict[ticker]['plot_df'][['date', train_predictions_model_mapper[best_model]]]
108
+ train_predictions = train_predictions[train_predictions['date'] <= self.global_max_date]
109
+ train_predictions.rename(columns={train_predictions_model_mapper[best_model]: ticker}, inplace=True)
110
+ self.train_predictions_df_list.append(train_predictions)
111
+
112
+ actual_train = self.tickers_dict[ticker]['plot_df'][['date', 'original_close']]
113
+ actual_train = actual_train[actual_train['date'] <= self.global_max_date]
114
+ actual_train.rename(columns={'original_close': ticker}, inplace=True)
115
+ self.actual_prices_train.append(actual_train)
116
+
117
+ test_predictions = self.tickers_dict[ticker]['plot_df'][['date', test_predictions_model_mapper[best_model]]]
118
+ test_predictions = test_predictions[test_predictions['date'] >= self.global_min_date]
119
+ test_predictions.rename(columns={test_predictions_model_mapper[best_model]: ticker}, inplace=True)
120
+ self.test_predictions_df_list.append(test_predictions)
121
+
122
+ actual_test = self.tickers_dict[ticker]['plot_df'][['date', 'original_close']]
123
+ actual_test = actual_test[actual_test['date'] >= self.global_min_date]
124
+ actual_test.rename(columns={'original_close': ticker}, inplace=True)
125
+ self.actual_prices_test.append(actual_test)
126
+
127
+ self.selected_features = [self.valid_tickers[0]]
128
+ #correlation_threshold = 0.9
129
+ for idx, feature in enumerate(self.valid_tickers):
130
+ if idx == 0:
131
+ continue
132
+ print(idx, feature)
133
+ tmp = self.train_predictions_df_list[0].merge(self.train_predictions_df_list[idx], on='date', how='inner')
134
+ # Вычисляем корреляцию нового признака с уже выбранными
135
+ correlations = [abs(tmp[feature].corr(tmp[sel_feature])) for sel_feature in self.selected_features]
136
+ print(correlations)
137
+ max_correlation = max(correlations)
138
+
139
+ # Добавляем признак, если максимальная корреляция не превышает порог
140
+ if max_correlation < self.correlation_threshold:
141
+ self.selected_features.append(feature)
142
+ self.train_predictions_df_list[0] = self.train_predictions_df_list[0].merge(self.train_predictions_df_list[idx], on='date', how='inner')
143
+ self.actual_prices_train[0] = self.actual_prices_train[0].merge(self.actual_prices_train[idx], on='date', how='inner')
144
+ self.test_predictions_df_list[0] = self.test_predictions_df_list[0].merge(self.test_predictions_df_list[idx], on='date', how='inner')
145
+ self.actual_prices_test[0] = self.actual_prices_test[0].merge(self.actual_prices_test[idx], on='date', how='inner')
146
+ print(self.selected_features)
147
+
148
+ selected_features_and_date = ['date'] + self.selected_features
149
+ print(selected_features_and_date)
150
+
151
+ # Calculate covariance matrix for the training period
152
+ train_data = self.train_predictions_df_list[0].drop(columns=['date']).astype(float)
153
+ self.cov_matrix = train_data[self.selected_features].cov()
154
+ print("Covariance matrix for the training period:")
155
+ print(self.cov_matrix)
156
+
157
+ # Split the global test period into validation and test sets
158
+ self.validation_size = int(len(self.test_predictions_df_list[0][selected_features_and_date]) * 0.5)
159
+ self.validation_data = self.test_predictions_df_list[0][selected_features_and_date].iloc[:self.validation_size]
160
+ self.validation_actual = self.actual_prices_test[0][selected_features_and_date].iloc[:self.validation_size]
161
+ self.test_data = self.test_predictions_df_list[0][selected_features_and_date].iloc[self.validation_size:]
162
+ self.test_actual = self.actual_prices_test[0][selected_features_and_date].iloc[self.validation_size:]
163
+
164
+ # Проверка положительной определённости
165
+ if np.any(np.linalg.eigvals(self.cov_matrix) <= 0):
166
+ raise ValueError("Ковариационная матрица не является положительно определённой.")
167
+
168
+ return self.cov_matrix, self.validation_data, self.validation_actual, self.test_data, self.test_actual, self.train_predictions_df_list, self.actual_prices_train, self.test_predictions_df_list, self.actual_prices_test, self.tickers_dict
169
+
170
+
171
+ class Portfolio():
172
+
173
+ def calculate_portfolio_metrics(self, weights, returns, cov_matrix):
174
+ portfolio_return = np.dot(weights, returns)
175
+ portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
176
+ return portfolio_return, portfolio_volatility
177
+
178
+ def optimize(self, returns, cov_matrix, target_return=None, allow_short=False):
179
+ num_assets = len(returns)
180
+ constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
181
+ if allow_short:
182
+ bounds = tuple((-1, 1) for _ in range(num_assets)) # Allow short positions
183
+ else:
184
+ bounds = tuple((0, 1) for _ in range(num_assets)) # Long-only portfolio
185
+ initial_weights = num_assets * [1. / num_assets]
186
+
187
+ if target_return is not None:
188
+ constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
189
+ {'type': 'eq', 'fun': lambda x: np.dot(x, returns) - target_return})
190
+
191
+ result = sco.minimize(
192
+ lambda w: self.calculate_portfolio_metrics(w, returns, cov_matrix)[1],
193
+ initial_weights,
194
+ method='SLSQP',
195
+ bounds=bounds,
196
+ constraints=constraints
197
+ )
198
+ return result.x
199
+
200
+ def process_period(self, data, actual_data, cov_matrix, target_return=None, allow_short=False):
201
+ # Forecast and optimize portfolio for each point T -> T+1 in validation and test data
202
+ realized_returns = []
203
+ predicted_returns = []
204
+ realized_volatilities = []
205
+ predicted_volatilities = []
206
+ for i in range(len(data) - 1):
207
+ current_data = data.iloc[i:i + 2] # Include current day and prediction for next day
208
+ actual_current_data = actual_data.iloc[i:i + 2] # Actual prices for T and T+1
209
+ # Calculate predicted return using actual price at T and predicted price at T+1
210
+ predicted_return = (current_data.drop(columns=['date']).iloc[1]-
211
+ actual_current_data.drop(columns=['date']).iloc[0]) / actual_current_data.drop(columns=['date']).iloc[0]
212
+ # Optimize portfolio based on predicted returns
213
+ self.weights = self.optimize(predicted_return, cov_matrix, target_return=target_return,
214
+ allow_short=allow_short)
215
+ pred_return, pred_volatility = self.calculate_portfolio_metrics(weights=self.weights, returns=predicted_return,
216
+ cov_matrix=cov_matrix)
217
+ # Compute realized return using actual prices for T and T+1
218
+ realized_return = (actual_current_data.drop(columns=['date']).iloc[1] -
219
+ actual_current_data.drop(columns=['date']).iloc[0]) / actual_current_data.drop(columns=['date']).iloc[0]
220
+
221
+ real_return, real_volatility = self.calculate_portfolio_metrics(weights=self.weights, returns=realized_return,
222
+ cov_matrix=cov_matrix)
223
+ realized_returns.append(real_return)
224
+ predicted_returns.append(pred_return)
225
+ realized_volatilities.append(real_volatility)
226
+ predicted_volatilities.append(pred_volatility)
227
+ return predicted_returns, realized_returns, predicted_volatilities, realized_volatilities
228
+
229
+
230
+ # Calculate accuracy metrics for validation and test sets
231
+ def calculate_accuracy(self, predicted, realized):
232
+ return np.mean(np.abs(np.array(predicted) - np.array(realized))) / np.mean(realized)
233
+
234
+
235
+ # Calculate Sharpe ratio deviation
236
+ def calculate_sharpe_ratio_deviation(self, predicted_returns, realized_returns, predicted_vol, realized_vol):
237
+ predicted_sharpe = np.mean(predicted_returns) / np.mean(predicted_vol)
238
+ realized_sharpe = np.mean(realized_returns) / np.mean(realized_vol)
239
+ return abs(predicted_sharpe - realized_sharpe)
240
+
241
+ def optimize_portfolio(self, cov_matrix, validation_data, validation_actual, test_data, test_actual, target_return: int | None = None, allow_short: bool = False):
242
+
243
+ # Calculate validation metrics
244
+ self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol = self.process_period(data=validation_data,
245
+ actual_data=validation_actual,
246
+ cov_matrix=cov_matrix,
247
+ target_return=target_return,
248
+ allow_short=allow_short)
249
+ self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol = self.process_period(data=test_data,
250
+ actual_data=test_actual,
251
+ cov_matrix=cov_matrix,
252
+ target_return=target_return,
253
+ allow_short=allow_short)
254
+
255
+ #print(self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol)
256
+ #print(self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol)
257
+
258
+
259
+ self.val_return_accuracy = self.calculate_accuracy(self.val_pred_returns, self.val_realized_returns)
260
+ self.val_volatility_accuracy = self.calculate_accuracy(self.val_pred_vol, self.val_realized_vol)
261
+ self.val_sharpe_deviation = self.calculate_sharpe_ratio_deviation(self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol)
262
+ self.val_sum_pred_returns = np.sum(self.val_pred_returns)
263
+ self.val_sum_realized_returns = np.sum(self.val_realized_returns)
264
+
265
+ self.test_return_accuracy = self.calculate_accuracy(self.test_pred_returns, self.test_realized_returns)
266
+ self.test_volatility_accuracy = self.calculate_accuracy(self.test_pred_vol, self.test_realized_vol)
267
+ self.test_sharpe_deviation = self.calculate_sharpe_ratio_deviation(self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol)
268
+ self.test_sum_pred_returns = np.sum(self.test_pred_returns)
269
+ self.test_sum_realized_returns = np.sum(self.test_realized_returns)
270
+
271
+ print(f"Validation Return Accuracy: {self.val_return_accuracy}")
272
+ print(f"Validation Volatility Accuracy: {self.val_volatility_accuracy}")
273
+ print(f"Validation Sharpe Ratio Deviation: {self.val_sharpe_deviation}")
274
+ print(f"Validation Pred Return Sum: {self.val_sum_pred_returns}")
275
+ print(f"Validation Actual Return Sum: {self.val_sum_realized_returns}")
276
+
277
+ print(f"Test Return Accuracy: {self.test_return_accuracy}")
278
+ print(f"Test Volatility Accuracy: {self.test_volatility_accuracy}")
279
+ print(f"Test Sharpe Ratio Deviation: {self.test_sharpe_deviation}")
280
+ print(f"Test Pred Return Sum: {self.test_sum_pred_returns}")
281
+ print(f"Test Actual Return Sum: {self.test_sum_realized_returns}")
282
+
283
+ #return val_return_accuracy, val_volatility_accuracy, val_sharpe_deviation, np.sum(val_pred_vol), np.sum(val_realized_returns), test_return_accuracy, test_volatility_accuracy, test_sharpe_deviation, np.sum(test_pred_vol), np.sum(test_realized_returns)
src/pages/.DS_Store ADDED
Binary file (8.2 kB). View file
 
src/pages/1_Model_optimization.py ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import math
4
+
5
+ # For Evalution we will use these library
6
+
7
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
8
+ from sklearn.preprocessing import MinMaxScaler
9
+
10
+ # For model building we will use these library
11
+
12
+ from tensorflow.keras.models import Sequential
13
+ from tensorflow.keras.layers import Dense
14
+ from tensorflow.keras.layers import LSTM
15
+ from tensorflow.keras import initializers
16
+ from tensorflow.keras.callbacks import EarlyStopping
17
+
18
+ # For PLotting we will use these library
19
+ import matplotlib.pyplot as plt
20
+
21
+ import yfinance as yf
22
+
23
+ import streamlit as st
24
+
25
+ from gmdh import CriterionType, Criterion, Multi, Combi, Mia, Ria, PolynomialType
26
+ from chronos import ChronosPipeline
27
+ import torch
28
+ import pmdarima as pm
29
+ from pages.utils.utils import create_dataset, make_prediction, make_prediction_recursive
30
+
31
+ from io import StringIO
32
+
33
+ st.set_page_config(
34
+ page_title="Model optimization",
35
+ page_icon="📈")
36
+
37
+ @st.cache_data
38
+ def get_pipeline():
39
+ pipeline = ChronosPipeline.from_pretrained(
40
+ "amazon/chronos-t5-tiny",
41
+ device_map="cpu", # use "cpu" for CPU inference and "mps" for Apple Silicon
42
+ torch_dtype=torch.bfloat16)
43
+ return pipeline
44
+
45
+ pipeline = get_pipeline()
46
+ seed = 42
47
+ st.title("Daily price prediction")
48
+ tickers = ['BTC', 'ETH', 'BNB', #'USDC',
49
+ 'XRP', 'STETH','ADA','DOGE',#'FGC',
50
+ 'WTRX','LTC','SOL','TRX','DOT','MATIC','BCH','WBTC','TON11419',
51
+ 'DAI','SHIB','AVAX','BUSD','LEO','LINK']
52
+ intervals = ['1d']#, '5d', '1wk', '1mo', '3mo'] #['1m', '2m', '5m','15m','30m','60m','90m','1h','1d','5d','1wk','1mo','3mo']
53
+ ticker = st.selectbox("Ticker", options=tickers)
54
+ interval = st.selectbox("Interval of raw data", options = intervals)
55
+
56
+ int_to_periods = {'1m':'5d', '2m':'1mo', '5m': '1mo','15m': '1mo','30m': '1mo','60m': '1mo','90m': '1mo',
57
+ '1h': '1y','1d': '10y','5d': '10y','1wk': '10y','1mo': '10y','3mo': '10y'}
58
+
59
+ period_cut = {'1d': '2022-02-19', '5d': '2020-06-19', '1wk': '2020-06-19', '1mo': '2014-06-19', '3mo': '2014-06-19'}
60
+
61
+ uploaded_file = st.file_uploader("Choose a file")
62
+
63
+
64
+ try:
65
+ maindf = yf.download(tickers = f"{ticker}-USD", # list of tickers
66
+ period = int_to_periods[interval], # time period
67
+ interval = interval, # trading interval
68
+ prepost = False, # download pre/post market hours data?
69
+ repair = True,) # repair obvious price errors e.g. 100x?
70
+ if len(maindf) == 0:
71
+ raise FileNotFoundError
72
+ except:
73
+ maindf = pd.read_csv(f'{ticker}.csv')
74
+
75
+ if uploaded_file is not None:
76
+ # To read file as bytes:
77
+ bytes_data = uploaded_file.getvalue()
78
+ # To convert to a string based IO:
79
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
80
+ # To read file as string:
81
+ string_data = stringio.read()
82
+
83
+ # Can be used wherever a "file-like" object is accepted:
84
+ maindf = pd.read_csv(uploaded_file)
85
+ st.write(maindf.head())
86
+
87
+
88
+
89
+ maindf=maindf.reset_index()
90
+ maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
91
+
92
+ #maindf = pd.read_csv('BTC-USD.csv')
93
+ print('Total number of days present in the dataset: ',maindf.shape[0])
94
+ print('Total number of fields present in the dataset: ',maindf.shape[1])
95
+ print(maindf.head())
96
+
97
+ y_overall = maindf.copy()#.loc[(maindf['Date'] >= '2014-09-17')]
98
+ #& (maindf['Date'] <= '2022-02-19')]
99
+
100
+ global_expander = st.sidebar.expander('Параметры режима моделирования')
101
+ scaling_expander= st.sidebar.expander('Режим масштабирования')
102
+ scaling_strategy_list = ['median', 'average', 'undersampling']
103
+ scale_step_type_list = ['D','W','M','Y']
104
+ scale_step_type = scaling_expander.selectbox('Шаг масштабирования', scale_step_type_list)
105
+ num_scale_steps = scaling_expander.slider('Размер шага масштабирования', 1, 100, 1)
106
+
107
+ y_overall = y_overall[['Date','Close']]
108
+ if num_scale_steps > 1:
109
+ scaling_strategy = scaling_expander.selectbox('Метод масштабирования', scaling_strategy_list)
110
+ scaling_step_combined = str(num_scale_steps) + scale_step_type
111
+ # Определяем сегодняшнюю дату
112
+ today = pd.Timestamp.now().normalize()
113
+ if scaling_strategy == 'average':
114
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
115
+ # Добавляем колонку для конца интервала
116
+ y_overall['Interval_End'] = today - (
117
+ (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
118
+ scaling_step_combined)
119
+ # Группируем по интервалам и считаем среднее
120
+ y_overall = y_overall.groupby('Interval_End')['Close'].mean().reset_index()
121
+ # Сортируем результат
122
+ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
123
+ y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
124
+ elif scaling_strategy == 'median':
125
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).median()
126
+ # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
127
+ # Добавляем колонку для конца интервала
128
+ y_overall['Interval_End'] = today - (
129
+ (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
130
+ scaling_step_combined)
131
+ # Группируем по интервалам и считаем среднее
132
+ y_overall = y_overall.groupby('Interval_End')['Close'].median().reset_index()
133
+ # Сортируем результат
134
+ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
135
+ y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
136
+ else:
137
+ # y_overall = y_overall.resample(on = 'Date', rule = scaling_step_combined).last()
138
+ # Устанавливаем 'Date' как индекс, если это ещё не сделано
139
+ # y_overall = y_overall.set_index('Date')
140
+ # y_overall.columns = y_overall.columns.droplevel(1)
141
+ y_overall = y_overall.resample(on='Date', rule=scaling_step_combined, origin='end').last()
142
+ y_overall = y_overall.reset_index()
143
+
144
+
145
+ #names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])
146
+ fig, ax = plt.subplots()
147
+ #ax.plot(y_overall.Date, y_overall['Close'], label = 'Stock Close Price')
148
+ ax.plot(y_overall['Close'], label = 'Stock Close Price')
149
+
150
+ ax.legend()
151
+ ax.set_title(f'Динамика цены закрытия для {ticker}')
152
+
153
+ #st.image(fig)
154
+ st.pyplot(fig)
155
+ #fig.show()
156
+
157
+
158
+
159
+
160
+ train = st.sidebar.button('Train')
161
+ time_step_backward = st.sidebar.slider('Количество шагов назад для предикторов', 5, 60, 15)
162
+ time_step_forward = st.sidebar.slider('Количество шагов вперед для таргета', 1, 60, 1)
163
+
164
+
165
+ pred_days = 1
166
+ recursive_pred = False
167
+ if time_step_forward == 1:
168
+ expander = st.sidebar.expander('Режим ресурсивного прогноза')
169
+ pred_days = expander.slider('Количество шагов для ресурсивного прогноза', 1, 30, 15)
170
+ recursive_pred = expander.checkbox('Запустить рекурсивный прогноз')
171
+
172
+
173
+
174
+ GMDH = st.sidebar.checkbox('Добавить режим МГУА')
175
+ transformer = st.sidebar.checkbox('Добавить режим Transformer')
176
+ if GMDH:
177
+ expander1 = st.sidebar.expander('Гиперпараметры МГУА')
178
+ GMDHs = {'Combi': Combi(), 'Multi': Multi(), 'Mia': Mia(), 'Ria': Ria()}
179
+ criterions = {'Критерий регулярности (несимметричная форма)': CriterionType.REGULARITY,
180
+ 'Критерий регулярности (симметричная форма)': CriterionType.SYM_REGULARITY,
181
+ 'Критерий стабильности (несимметричная форма)': CriterionType.STABILITY,
182
+ 'Критерий стабильности (симметричная форма)': CriterionType.SYM_STABILITY,
183
+ 'Критерий минимума смещения коэффициентов': CriterionType.UNBIASED_COEFFS,
184
+ 'Критерий минимума смещения решений (несимметричная форма)': CriterionType.UNBIASED_OUTPUTS,
185
+ 'Критерий минимума смещения решений (симметричная форма)': CriterionType.SYM_UNBIASED_OUTPUTS,
186
+ 'Абсолютно помехоустойчивый критерий (несимметричная форма)': CriterionType.ABSOLUTE_NOISE_IMMUNITY,
187
+ 'Абсолютно помехоустойчивый критерий (симметричная форма)': CriterionType.SYM_ABSOLUTE_NOISE_IMMUNITY}
188
+ polynoms = {'LINEAR': PolynomialType.LINEAR,
189
+ 'LINEAR_COV': PolynomialType.LINEAR_COV,
190
+ 'QUADRATIC': PolynomialType.QUADRATIC}
191
+ GMDH_algo = expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
192
+ criterion = expander1.selectbox("Внешний критерий", options = criterions.keys())
193
+ p_average = expander1.slider('p_average', 1, 10, 1)
194
+ limit = expander1.number_input('limit', value = 0.)
195
+ k_best = expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
196
+ polynom = expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
197
+
198
+
199
+ y_overall.columns = y_overall.columns.droplevel(1)#.droplevel()
200
+ #y_overall = y_overall.reset_index()
201
+
202
+
203
+ if train:
204
+ my_bar = st.progress(0, text='Model training progress. Truncating the dataset now')
205
+ # Lets First Take all the Close Price
206
+ closedf = y_overall[['Date', 'Close']]#maindf[['Date', 'Close']]
207
+ print("Shape of close dataframe:", closedf.shape)
208
+ closedf = closedf[-1000:]#closedf[closedf['Date'] > period_cut[interval]]
209
+ close_stock = closedf.copy()
210
+ print("Total data for prediction: ", closedf.shape[0])
211
+ my_bar.progress(10 + 1, text='Truncated the dataset -> Scaling it')
212
+ # deleting date column and normalizing using MinMax Scaler
213
+
214
+
215
+ del closedf['Date']
216
+ scaler = MinMaxScaler(feature_range=(0, 1))
217
+ #closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1))
218
+ print(closedf.shape)
219
+
220
+ my_bar.progress(20 + 1, text='Scaled the dataset -> Splitting it into subsamples')
221
+ # we keep the training set as 60% and 40% testing set
222
+
223
+ training_size = int(len(closedf) * 0.70)
224
+ test_size = len(closedf) - training_size
225
+ assert test_size > time_step_backward + time_step_forward, "Test_size is shorter than time_step_backward + time_step_forward"
226
+ train_data, test_data = closedf[0:training_size], closedf[training_size:len(closedf)]
227
+ train_data = scaler.fit_transform(train_data)
228
+ test_data = scaler.transform(test_data)
229
+ print("train_data: ", train_data.shape)
230
+ print("test_data: ", test_data.shape)
231
+
232
+ my_bar.progress(30 + 1, text='Split it into subsamples -> Cutting them into observations')
233
+
234
+ X_train, y_train = create_dataset(train_data, time_step_backward, time_step_forward)
235
+ X_test, y_test = create_dataset(test_data, time_step_backward, time_step_forward)
236
+
237
+ print("X_train: ", X_train.shape)
238
+ print("y_train: ", y_train.shape)
239
+ print("X_test: ", X_test.shape)
240
+ print("y_test", y_test.shape)
241
+
242
+ # reshape input to be [samples, time steps, features] which is required for LSTM
243
+ X_train_gmdh = X_train.copy()
244
+ X_test_gmdh = X_test.copy()
245
+ X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
246
+ X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
247
+
248
+ print("X_train: ", X_train.shape)
249
+ print("X_test: ", X_test.shape)
250
+
251
+ my_bar.progress(40 + 1, text='Cut it into observations -> Training the model')
252
+ model = Sequential()
253
+ model.add(LSTM(10, input_shape=(None, 1), activation="relu",
254
+ kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed)))
255
+ model.add(Dense(1,
256
+ kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed)))
257
+ model.compile(loss="mean_squared_error", optimizer="adam")
258
+ callback = EarlyStopping(monitor='loss', patience=30, restore_best_weights = True)
259
+ history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=10,
260
+ callbacks = [callback])
261
+
262
+ arima_model = pm.auto_arima(train_data,
263
+ m=12, # frequency of series
264
+ seasonal=True, # TRUE if seasonal series
265
+ d=None, # let model determine 'd'
266
+ test='adf', # use adftest to find optimal 'd'
267
+ start_p=0, start_q=0, # minimum p and q
268
+ max_p=time_step_backward, max_q=time_step_backward, # maximum p and q
269
+ D=None, # let model determine 'D'
270
+ trace=True,
271
+ error_action='ignore',
272
+ suppress_warnings=True,
273
+ stepwise=True)
274
+ st.text(arima_model.summary())
275
+
276
+ if GMDH:
277
+ model_gmdh = GMDHs[GMDH_algo]
278
+ if GMDH_algo == 'Combi':
279
+ model_gmdh.fit(X_train_gmdh, y_train, p_average = p_average, limit = limit, test_size=0.3,
280
+ criterion = Criterion(criterion_type = criterions[criterion]))
281
+ if GMDH_algo == 'Multi':
282
+ model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3,
283
+ criterion=Criterion(criterion_type=criterions[criterion]),
284
+ k_best = k_best)
285
+ if GMDH_algo in ['Ria', 'Mia']:
286
+ model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3,
287
+ criterion=Criterion(criterion_type=criterions[criterion]),
288
+ k_best = k_best, polynomial_type = polynoms[polynom])
289
+ st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
290
+
291
+
292
+ my_bar.progress(70 + 1, text='Trained model -> Calculating loss')
293
+ import matplotlib.pyplot as plt
294
+
295
+ loss = history.history['loss']
296
+ val_loss = history.history['val_loss']
297
+
298
+ epochs = range(len(loss))
299
+
300
+ fig, ax = plt.subplots()
301
+ ax.plot(epochs, loss, 'r', label='Training loss')
302
+ ax.plot(epochs, val_loss, 'b', label='Validation loss')
303
+ ax.legend()
304
+ ax.set_title('Потери на обучении и валидации')
305
+ #ax.set_ylim[0, 0.2]
306
+ st.pyplot(fig)
307
+
308
+ my_bar.progress(80 + 1, text='Calculated loss -> Scoring the dataset')
309
+
310
+
311
+ original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1))
312
+ original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1))
313
+
314
+ train_predict, test_predict = make_prediction(X_train, X_test, method='LSTM', model=model,
315
+ scaler=scaler, time_step_forward=time_step_forward)
316
+ train_predict_arima, test_predict_arima = make_prediction(X_train, X_test, method='SARIMA', model=arima_model,
317
+ scaler=scaler, time_step_forward=time_step_forward)
318
+ if GMDH:
319
+ train_predict_gmdh, test_predict_gmdh = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH', model=model_gmdh,
320
+ scaler=scaler, time_step_forward=time_step_forward)
321
+ if transformer:
322
+ X_train_forecast_median, X_test_forecast_median = make_prediction(X_train_gmdh, X_test_gmdh, method='Transformer', model=pipeline,
323
+ scaler=scaler, time_step_forward=time_step_forward)
324
+
325
+ my_bar.progress(85 + 1, text='Scored the dataset -> Calculating perfomance metrics')
326
+
327
+ # Evaluation metrices RMSE and MAE
328
+ metrics_tmp = {}
329
+ metrics1 = {}
330
+ metrics1['LSTM'] = []
331
+ #metrics1['Transformer'] = []
332
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict))
333
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict)
334
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict)
335
+ print("-------------------------------------------------------------------------------------")
336
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict))
337
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict)
338
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict)
339
+ #metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
340
+ #metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
341
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict)
342
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict)
343
+ for metric in metrics_tmp:
344
+ print(metric, ': ', metrics_tmp[metric])
345
+ metrics1['LSTM'].append(metrics_tmp[metric])
346
+
347
+
348
+ metrics1['SARIMA'] = []
349
+ # metrics1['Transformer'] = []
350
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_arima))
351
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_arima)
352
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_arima)
353
+ print("-------------------------------------------------------------------------------------")
354
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_arima))
355
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_arima)
356
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_arima)
357
+ # metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
358
+ # metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
359
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_arima)
360
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_arima)
361
+ for metric in metrics_tmp:
362
+ print(metric, ': ', metrics_tmp[metric])
363
+ metrics1['SARIMA'].append(metrics_tmp[metric])
364
+ if GMDH:
365
+ metrics1['GMDH'] = []
366
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh))
367
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh)
368
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh)
369
+ print("-------------------------------------------------------------------------------------")
370
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh))
371
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh)
372
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh)
373
+ #metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
374
+ #metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
375
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh)
376
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh)
377
+ for metric in metrics_tmp:
378
+ print(metric, ': ', metrics_tmp[metric])
379
+ metrics1['GMDH'].append(metrics_tmp[metric])
380
+
381
+ if transformer:
382
+ metrics1['Transformer'] = []
383
+ metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, X_train_forecast_median))
384
+ metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, X_train_forecast_median)
385
+ metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, X_train_forecast_median)
386
+ print("-------------------------------------------------------------------------------------")
387
+ metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, X_test_forecast_median))
388
+ metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, X_test_forecast_median)
389
+ metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, X_test_forecast_median)
390
+ # metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
391
+ # metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
392
+ metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, X_train_forecast_median)
393
+ metrics_tmp["Test data R2 score"] = r2_score(original_ytest, X_test_forecast_median)
394
+ for metric in metrics_tmp:
395
+ print(metric, ': ', metrics_tmp[metric])
396
+ metrics1['Transformer'].append(metrics_tmp[metric])
397
+
398
+ metrics_df = pd.DataFrame.from_dict(metrics1, orient = 'columns')#(metrics, columns = ['LSTM', 'GMDH'])
399
+ metrics_df.index = metrics_tmp.keys()
400
+ st.write(metrics_df)
401
+ #print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
402
+ #print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
403
+ #print("----------------------------------------------------------------------")
404
+ #print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
405
+ #print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))
406
+
407
+
408
+ my_bar.progress(90 + 1, text='Calculated performance metrics -> Plotting predictions')
409
+
410
+ # shift train predictions for plotting
411
+
412
+ lag = time_step_backward + (time_step_forward - 1)
413
+ trainPredictPlot_arima = np.empty_like(closedf)
414
+ trainPredictPlot_arima[:, :] = np.nan
415
+ trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :] = train_predict_arima
416
+ print(trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :].shape, train_predict_arima.shape)
417
+ print("Train predicted data: ", trainPredictPlot_arima.shape)
418
+
419
+ # shift test predictions for plotting
420
+ testPredictPlot_arima = np.empty_like(closedf)
421
+ testPredictPlot_arima[:, :] = np.nan
422
+ testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :] = test_predict_arima
423
+ print(testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :].shape, test_predict_arima.shape)
424
+ print("Test predicted data: ", testPredictPlot_arima.shape)
425
+
426
+
427
+
428
+ trainPredictPlot = np.empty_like(closedf)
429
+ trainPredictPlot[:, :] = np.nan
430
+ trainPredictPlot[lag:len(train_predict) + lag, :] = train_predict
431
+ print(trainPredictPlot[lag:len(train_predict) + lag, :].shape, train_predict.shape)
432
+ print("Train predicted data: ", trainPredictPlot.shape)
433
+
434
+ # shift test predictions for plotting
435
+ testPredictPlot = np.empty_like(closedf)
436
+ testPredictPlot[:, :] = np.nan
437
+ testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :] = test_predict
438
+ print(testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :].shape, test_predict.shape)
439
+ print("Test predicted data: ", testPredictPlot.shape)
440
+
441
+ if GMDH:
442
+ trainPredictPlot_gmdh = np.empty_like(closedf)
443
+ trainPredictPlot_gmdh[:, :] = np.nan
444
+ trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :] = train_predict_gmdh
445
+ print(trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :].shape, train_predict_gmdh.shape)
446
+
447
+ testPredictPlot_gmdh = np.empty_like(closedf)
448
+ testPredictPlot_gmdh[:, :] = np.nan
449
+ testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :] = test_predict_gmdh
450
+ print(testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :].shape, test_predict_gmdh.shape)
451
+
452
+ if transformer:
453
+ trainPredictPlot_transformer = np.empty_like(closedf)
454
+ trainPredictPlot_transformer[:, :] = np.nan
455
+ trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :] = X_train_forecast_median
456
+ print(trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :].shape,
457
+ X_train_forecast_median.shape)
458
+
459
+ testPredictPlot_transformer = np.empty_like(closedf)
460
+ testPredictPlot_transformer[:, :] = np.nan
461
+ testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :] = X_test_forecast_median
462
+ print(testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :].shape,
463
+ X_test_forecast_median.shape)
464
+
465
+ if GMDH:
466
+ if transformer:
467
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
468
+ 'original_close': close_stock['Close'],
469
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
470
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
471
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
472
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
473
+ 'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
474
+ 'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
475
+ 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
476
+ 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
477
+ elif not transformer:
478
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
479
+ 'original_close': close_stock['Close'],
480
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
481
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
482
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
483
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
484
+ 'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
485
+ 'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist()})
486
+ elif not GMDH:
487
+ if transformer:
488
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
489
+ 'original_close': close_stock['Close'],
490
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
491
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
492
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
493
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
494
+ 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
495
+ 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
496
+ else:
497
+ plotdf = pd.DataFrame({'date': close_stock['Date'],
498
+ 'original_close': close_stock['Close'],
499
+ 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
500
+ 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
501
+ 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
502
+ 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()})
503
+ fig, ax = plt.subplots()
504
+ ax.plot(plotdf['date'], plotdf['original_close'], label='Оригинальная цена закрытия')
505
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_arima'], label='Предсказанная цена закрытия на тренировке SARIMA')
506
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_arima'], label='Предсказанная цена закрытия на тесте SARIMA')
507
+ ax.plot(plotdf['date'], plotdf['train_predicted_close'], label='Предсказанная цена закрытия на тренировке')
508
+ ax.plot(plotdf['date'], plotdf['test_predicted_close'], label='Предсказанная цена закрытия на тесте')
509
+ if GMDH:
510
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh'], label='Предсказанная цена закрытия на тренировке GMDH')
511
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh'], label='Предсказанная цена закрытия на тесте GMDH')
512
+ if transformer:
513
+ ax.plot(plotdf['date'], plotdf['train_predicted_close_transformer'], label='Предсказанная цена закрытия на тренировке Transformer')
514
+ ax.plot(plotdf['date'], plotdf['test_predicted_close_transformer'], label='Предсказанная цена закрытия на тесте Transformer')
515
+ ax.legend()
516
+ ax.set_title("Сравнение исходных и смоделированных цен")
517
+ st.pyplot(fig)
518
+
519
+
520
+ my_bar.progress(100, text='Done')
521
+
522
+
523
+ if recursive_pred:
524
+ lst_output_arima = make_prediction_recursive(test_data=test_data, method='SARIMA', model=arima_model,
525
+ scaler=scaler, pred_days=pred_days,
526
+ time_step_backward=time_step_backward)
527
+ lst_output_lstm = make_prediction_recursive(test_data=test_data, method='LSTM', model=model,
528
+ scaler=scaler, pred_days=pred_days,
529
+ time_step_backward=time_step_backward)
530
+ if GMDH:
531
+ lst_output_gmdh = make_prediction_recursive(test_data=test_data, method='GMDH', model=model_gmdh,
532
+ scaler=scaler, pred_days=pred_days,
533
+ time_step_backward=time_step_backward)
534
+ if transformer:
535
+ lst_output_transformer = make_prediction_recursive(test_data=test_data, method='Transformer', model=pipeline,
536
+ scaler=scaler, pred_days=pred_days,
537
+ time_step_backward=time_step_backward)
538
+
539
+ """
540
+ x_input = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
541
+ temp_input = list(x_input)
542
+ temp_input = temp_input[0].tolist()
543
+
544
+
545
+ lst_output = []
546
+ n_steps = time_step_backward
547
+ i = 0
548
+ while (i < pred_days):
549
+
550
+ if (len(temp_input) > time_step_backward):
551
+
552
+ x_input = np.array(temp_input[1:])
553
+ # print("{} day input {}".format(i,x_input))
554
+ x_input = x_input.reshape(1, -1)
555
+ x_input = x_input.reshape((1, n_steps, 1))
556
+
557
+ yhat = model.predict(x_input, verbose=0)
558
+ # print("{} day output {}".format(i,yhat))
559
+ temp_input.extend(yhat[0].tolist())
560
+ temp_input = temp_input[1:]
561
+ # print(temp_input)
562
+
563
+ lst_output.extend(yhat.tolist())
564
+ i = i + 1
565
+
566
+ else:
567
+
568
+ x_input = x_input.reshape((1, n_steps, 1))
569
+ yhat = model.predict(x_input, verbose=0)
570
+ temp_input.extend(yhat[0].tolist())
571
+
572
+ lst_output.extend(yhat.tolist())
573
+ i = i + 1
574
+
575
+ print("Output of predicted next steps: ", len(lst_output))
576
+ """
577
+ last_days = np.arange(1, time_step_backward + 1)
578
+ day_pred = np.arange(time_step_backward + 1, time_step_backward + pred_days + 1)
579
+ print(last_days)
580
+ print(day_pred)
581
+
582
+ temp_mat = np.empty((len(last_days) + pred_days, 1))
583
+ temp_mat[:] = np.nan
584
+ """
585
+ last_original_days_value = temp_mat.copy()
586
+ next_predicted_days_value = temp_mat.copy()
587
+ last_original_days_value[0:time_step_backward] = closedf[len(closedf) - time_step_backward:].values
588
+ next_predicted_days_value[time_step_backward:] = scaler.inverse_transform(np.array(lst_output))
589
+ """
590
+ last_original_days_value = temp_mat.copy()
591
+ next_predicted_days_value_arima = temp_mat.copy()
592
+ next_predicted_days_value_lstm = temp_mat.copy()
593
+ if GMDH:
594
+ next_predicted_days_value_gmdh = temp_mat.copy()
595
+ if transformer:
596
+ next_predicted_days_value_transformer = temp_mat.copy()
597
+
598
+ last_original_days_value[0:time_step_backward] = \
599
+ closedf[len(closedf) - time_step_backward:].values
600
+ next_predicted_days_value_arima[time_step_backward:] = lst_output_arima
601
+ next_predicted_days_value_lstm[time_step_backward:] = lst_output_lstm
602
+ if GMDH:
603
+ next_predicted_days_value_gmdh[time_step_backward:] = lst_output_gmdh
604
+ if transformer:
605
+ next_predicted_days_value_transformer[time_step_backward:] = lst_output_transformer
606
+
607
+ """
608
+ new_pred_plot = pd.DataFrame({
609
+ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
610
+ 'next_predicted_days_value': next_predicted_days_value.reshape(1, -1).tolist()[0]
611
+ })
612
+
613
+
614
+
615
+ fig, ax = plt.subplots()
616
+ ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'], label=f"Последние {time_step_backward} шагов цены закратия")
617
+ ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value'], label=f"Предсказанные следующие {pred_days} шагов цены закрытия")
618
+ ax.legend()
619
+ ax.set_title(f"Сравнения последних {time_step_backward} шагов и следующих {pred_days} шагов")
620
+ st.pyplot(fig)
621
+ """
622
+ if GMDH:
623
+ if transformer:
624
+ new_pred_plot = pd.DataFrame({
625
+ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
626
+ 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
627
+ 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
628
+ 'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0],
629
+ 'next_predicted_days_value_transformer':
630
+ next_predicted_days_value_transformer.reshape(1, -1).tolist()[0]
631
+ })
632
+ elif not transformer:
633
+ new_pred_plot = pd.DataFrame({
634
+ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
635
+ 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
636
+ 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
637
+ 'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0]
638
+ })
639
+ elif not GMDH:
640
+ if transformer:
641
+ new_pred_plot = pd.DataFrame({
642
+ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
643
+ 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
644
+ 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
645
+ 'next_predicted_days_value_transformer':
646
+ next_predicted_days_value_transformer.reshape(1, -1).tolist()[0]
647
+ })
648
+ else:
649
+ new_pred_plot = pd.DataFrame({
650
+ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
651
+ 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
652
+ 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0]
653
+ })
654
+ fig, ax = plt.subplots()
655
+ ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'],
656
+ label=f"Последние {time_step_backward} шагов цены закратия")
657
+ ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_arima'],
658
+ label=f"Предсказанные следующие {pred_days} шагов цены закрытия SARIMA")
659
+ ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_lstm'],
660
+ label=f"Предсказанные следующие {pred_days} шагов цены закрытия LSTM")
661
+ if GMDH:
662
+ ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_gmdh'],
663
+ label=f"Предсказанные следующие {pred_days} шагов цены закрытия GMDH")
664
+ if transformer:
665
+ ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_transformer'],
666
+ label=f"Предсказанные следующие {pred_days} шагов цены закрытия Transformer")
667
+ ax.legend()
668
+ ax.set_title(f"Сравнения последних {time_step_backward} шагов и следующих {pred_days} шагов")
669
+ ax.set_ylim(0, closedf['Close'].max() * 1.5)
670
+ st.pyplot(fig)
671
+ #ax.plot()
672
+
673
+
674
+ @st.cache_data
675
+ def convert_df(df):
676
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
677
+ return df.to_csv().encode("utf-8")
678
+ @st.cache_data
679
+ def convert_metrics_df(df):
680
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
681
+ return df.to_csv().encode("utf-8")
682
+
683
+ plotdf_csv = convert_df(plotdf)
684
+ metrics_df_csv = convert_metrics_df(metrics_df)
685
+ st.download_button('Download data', plotdf_csv, file_name='predictions.csv', mime="text/csv")
686
+ st.download_button('Download metrics', metrics_df_csv, file_name='metrics.csv', mime="text/csv")
687
+
688
+
src/pages/utils/utils.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from typing import Literal
3
+ import torch
4
+ from typing import List
5
+
6
+ def create_dataset(dataset, time_step_backward = 1, time_step_forward = 1):
7
+ dataX, dataY = [], []
8
+ for i in range(len(dataset) - time_step_backward - (time_step_forward - 1)):
9
+ a = dataset[i:(i + time_step_backward), 0] ###i=0, 0,1,2,3-----99 100
10
+ dataX.append(a)
11
+ dataY.append(dataset[i + time_step_backward + (time_step_forward - 1), 0])
12
+ return np.array(dataX), np.array(dataY)
13
+
14
+ def make_prediction(X_train: np.ndarray, X_test: np.ndarray,
15
+ method: Literal['LSTM', 'GMDH', 'Transformer', 'SARIMA'],
16
+ model, scaler, time_step_forward: None) -> np.ndarray:
17
+ if method == 'LSTM':
18
+ train_predict = model.predict(X_train)
19
+ test_predict = model.predict(X_test)
20
+ train_predict = scaler.inverse_transform(train_predict)
21
+ test_predict = scaler.inverse_transform(test_predict)
22
+ return train_predict, test_predict
23
+ elif method == 'SARIMA':
24
+ train_predict_arima = []
25
+ test_predict_arima = []
26
+ for sample in X_train:
27
+ train_predict_arima.append(
28
+ model.fit_predict(sample, n_periods=time_step_forward, return_conf_int=False)[-1])
29
+ train_predict_arima = np.array(train_predict_arima)
30
+ for sample in X_test:
31
+ test_predict_arima.append(
32
+ model.fit_predict(sample, n_periods=time_step_forward, return_conf_int=False)[-1])
33
+ test_predict_arima = np.array(test_predict_arima)
34
+ train_predict_arima = scaler.inverse_transform(train_predict_arima.reshape(-1, 1))
35
+ test_predict_arima = scaler.inverse_transform(test_predict_arima.reshape(-1, 1))
36
+ return train_predict_arima, test_predict_arima
37
+ elif method == 'GMDH':
38
+ train_predict_gmdh = model.predict(X_train)
39
+ test_predict_gmdh = model.predict(X_test)
40
+ train_predict_gmdh = scaler.inverse_transform(train_predict_gmdh.reshape(-1, 1))
41
+ test_predict_gmdh = scaler.inverse_transform(test_predict_gmdh.reshape(-1, 1))
42
+ return train_predict_gmdh, test_predict_gmdh
43
+ elif method == 'Transformer':
44
+ X_train_context = torch.tensor(X_train)
45
+ X_test_context = torch.tensor(X_test)
46
+ X_train_forecast = model.predict(
47
+ X_train_context,
48
+ time_step_forward,
49
+ num_samples=3,
50
+ temperature=1.0,
51
+ top_k=50,
52
+ top_p=1.0)
53
+ X_test_forecast = model.predict(
54
+ X_test_context,
55
+ time_step_forward,
56
+ num_samples=3,
57
+ temperature=1.0,
58
+ top_k=50,
59
+ top_p=1.0)
60
+ X_train_forecast_median = np.quantile(X_train_forecast.numpy(), 0.5, axis=1)[:, -1]
61
+ X_test_forecast_median = np.quantile(X_test_forecast.numpy(), 0.5, axis=1)[:, -1]
62
+ X_train_forecast_median = scaler.inverse_transform(X_train_forecast_median.reshape(-1, 1))
63
+ X_test_forecast_median = scaler.inverse_transform(X_test_forecast_median.reshape(-1, 1))
64
+ return X_train_forecast_median, X_test_forecast_median
65
+
66
+
67
+
68
+
69
+
70
+ def make_prediction_recursive(test_data: np.ndarray,
71
+ method: Literal['LSTM', 'GMDH', 'Transformer', 'SARIMA'],
72
+ model, scaler, pred_days: None, time_step_backward: None) -> List[int]:
73
+ if method == 'LSTM':
74
+ x_input_lstm = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
75
+ temp_input_lstm = list(x_input_lstm)
76
+ temp_input_lstm = temp_input_lstm[0].tolist()
77
+ lst_output_lstm = []
78
+ n_steps = time_step_backward
79
+ i = 0
80
+ while (i < pred_days):
81
+ if (len(temp_input_lstm) > time_step_backward):
82
+
83
+ x_input_lstm = np.array(temp_input_lstm[1:])
84
+ x_input_lstm = x_input_lstm.reshape(1, -1)
85
+ x_input_lstm = x_input_lstm.reshape((1, n_steps, 1))
86
+
87
+ yhat_lstm = model.predict(x_input_lstm, verbose=0)
88
+ temp_input_lstm.extend(yhat_lstm[0].tolist())
89
+ temp_input_lstm = temp_input_lstm[1:]
90
+ lst_output_lstm.extend(yhat_lstm.tolist())
91
+ i = i + 1
92
+ else:
93
+ x_input_lstm = x_input_lstm.reshape((1, n_steps, 1))
94
+ yhat_lstm = model.predict(x_input_lstm, verbose=0)
95
+ temp_input_lstm.extend(yhat_lstm[0].tolist())
96
+ lst_output_lstm.extend(yhat_lstm.tolist())
97
+ i = i + 1
98
+
99
+ lst_output_lstm = scaler.inverse_transform(lst_output_lstm)
100
+ return lst_output_lstm
101
+ elif method == 'SARIMA':
102
+ x_input_arima = test_data[len(test_data) - time_step_backward:]
103
+ n_steps = time_step_backward
104
+ lst_output_arima = model.fit_predict(x_input_arima, n_periods=pred_days, return_conf_int=False) # [-1]
105
+ lst_output_arima = scaler.inverse_transform(lst_output_arima.reshape(-1, 1))
106
+ return lst_output_arima
107
+ elif method == 'GMDH':
108
+ x_input_gmdh = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
109
+ temp_input_gmdh = list(x_input_gmdh)
110
+ temp_input_gmdh = temp_input_gmdh[0].tolist()
111
+ lst_output_gmdh = []
112
+ n_steps = time_step_backward
113
+ i = 0
114
+ while (i < pred_days):
115
+ if (len(temp_input_gmdh) > time_step_backward):
116
+ x_input_gmdh = np.array(temp_input_gmdh[1:])
117
+ x_input_gmdh = x_input_gmdh.reshape(1, -1)
118
+ yhat_gmdh = model.predict(x_input_gmdh)
119
+ temp_input_gmdh.extend(yhat_gmdh.tolist())
120
+ temp_input_gmdh = temp_input_gmdh[1:]
121
+ lst_output_gmdh.extend(yhat_gmdh.tolist())
122
+ i = i + 1
123
+ else:
124
+ x_input_gmdh = x_input_gmdh.reshape((1, n_steps, 1))
125
+ yhat_gmdh = model.predict(x_input_gmdh[0].reshape(1, -1))
126
+ temp_input_gmdh.extend(yhat_gmdh.tolist())
127
+ lst_output_gmdh.extend(yhat_gmdh.tolist())
128
+ i = i + 1
129
+ lst_output_gmdh = scaler.inverse_transform(np.array(lst_output_gmdh).reshape(-1, 1))
130
+ return lst_output_gmdh
131
+ elif method == 'Transformer':
132
+ x_input_transformer = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
133
+ x_input_transformer = torch.tensor(x_input_transformer)
134
+ lst_output_forecast = model.predict(
135
+ x_input_transformer,
136
+ pred_days,
137
+ num_samples=3,
138
+ temperature=1.0,
139
+ top_k=50,
140
+ top_p=1.0)
141
+ X_train_forecast_median = np.quantile(lst_output_forecast.numpy(), 0.5, axis=1) # [:, -1]
142
+ lst_output_transformer = scaler.inverse_transform(X_train_forecast_median.reshape(-1, 1))
143
+ return lst_output_transformer
src/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ git+https://github.com/amazon-science/chronos-forecasting.git
2
+ pmdarima
3
+ streamlit
4
+ yfinance
5
+ gmdh
6
+
src/sidebar_portfolio.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def sidebar():
4
+ import streamlit as st
5
+ #scale_step_type_list = ['Максимизация доходности при заданном уровне риска','Минимизация риска при заданном уровне доходности']
6
+ #scale_step_type = st.sidebar.selectbox('Оптимизация', scale_step_type_list)
7
+ scaling_strategy_list = ['average', 'median', 'undersampling']
8
+ top_n = st.sidebar.number_input('Количество активов-кандидатов', value=5)
9
+ num_scale_steps = st.sidebar.slider('Горизонт инвестирования, дней', 1, 100, 1)
10
+ scaling_strategy = st.sidebar.selectbox('Стратегия масштабирования', scaling_strategy_list)
11
+ target_return_expander = st.sidebar.expander('Задать целевую доходность')
12
+ target_return = target_return_expander.slider('Уровень доходности, %', 1, 100, None)
13
+ if target_return:
14
+ target_return *= 0.01
15
+ time_step_backward = st.sidebar.slider('Количество предикторов, дней', 1, 100, 15)
16
+ allow_short = st.sidebar.checkbox('Разрешить короткие позиции')
17
+
18
+
19
+ scaling_strategy = 'average'
20
+ time_step_backward = 15
21
+ return {'top_n': top_n,
22
+ 'num_scale_steps': num_scale_steps,
23
+ 'scaling_strategy': scaling_strategy,
24
+ 'target_return': target_return,
25
+ 'time_step_backward': time_step_backward,
26
+ 'allow_short': allow_short}
27
+