Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- src/Portfolio_optimization.py +72 -0
- src/README.md +1 -0
- src/experiment_runner_for_best_models.py +568 -0
- src/experiment_runner_for_portfolio.py +283 -0
- src/pages/.DS_Store +0 -0
- src/pages/1_Model_optimization.py +688 -0
- src/pages/utils/utils.py +143 -0
- src/requirements.txt +6 -0
- src/sidebar_portfolio.py +27 -0
src/Portfolio_optimization.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
from experiment_runner_for_portfolio import DataLoader, Portfolio
|
| 4 |
+
import pickle
|
| 5 |
+
from sidebar_portfolio import sidebar
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
st.set_page_config(
|
| 9 |
+
page_title="Portfolio optimization",
|
| 10 |
+
page_icon="📊")
|
| 11 |
+
|
| 12 |
+
st.title("Portfolio Optimization")
|
| 13 |
+
sidebar_dict = sidebar()
|
| 14 |
+
run = st.sidebar.button('Run portfolio optimization')
|
| 15 |
+
dataloader = DataLoader()
|
| 16 |
+
portfolio = Portfolio()
|
| 17 |
+
if run:
|
| 18 |
+
st.header('Price Prediction Results')
|
| 19 |
+
dataloader.experiment_data(top_n = sidebar_dict['top_n'], num_scale_steps = sidebar_dict['num_scale_steps'],
|
| 20 |
+
scaling_strategy = sidebar_dict['scaling_strategy'], time_step_backward = sidebar_dict['time_step_backward'])
|
| 21 |
+
#plot_df, metrics_df, models_dict = experiment(ticker = sidebar_dict['ticker'], num_scale_steps= sidebar_dict['num_scale_steps'],
|
| 22 |
+
# scaling_strategy= sidebar_dict['scaling_strategy'], time_step_backward= sidebar_dict['time_step_backward'])
|
| 23 |
+
col1_tickers, col2_tickers = st.columns(2)
|
| 24 |
+
with col1_tickers:
|
| 25 |
+
st.subheader('Valid Tickers:')
|
| 26 |
+
st.write(dataloader.valid_tickers)
|
| 27 |
+
with col2_tickers:
|
| 28 |
+
st.subheader('Invalid Tickers:')
|
| 29 |
+
st.write(dataloader.invalid_tickers)
|
| 30 |
+
|
| 31 |
+
col1_date, col2_date = st.columns(2)
|
| 32 |
+
with col1_date:
|
| 33 |
+
st.write('Test Min Date:')
|
| 34 |
+
st.write(dataloader.global_min_date)
|
| 35 |
+
with col2_date:
|
| 36 |
+
st.write('Training Max Date:')
|
| 37 |
+
st.write(dataloader.global_max_date)
|
| 38 |
+
|
| 39 |
+
st.subheader('Model Metrics:')
|
| 40 |
+
for ticker in dataloader.valid_tickers:
|
| 41 |
+
st.write(f'{ticker}:')
|
| 42 |
+
st.write('Best model on test data MAPE: ', dataloader.tickers_dict[ticker]['metrics_df'].T.sort_values(by='Test data MAPE', ascending=True).index[0])
|
| 43 |
+
st.write(dataloader.tickers_dict[ticker]['metrics_df'])
|
| 44 |
+
|
| 45 |
+
st.header('Portfolio Optimization Results')
|
| 46 |
+
portfolio.optimize_portfolio(cov_matrix=dataloader.cov_matrix, validation_data=dataloader.validation_data, validation_actual=dataloader.validation_actual,
|
| 47 |
+
test_data=dataloader.test_data, test_actual=dataloader.test_actual, target_return=sidebar_dict['target_return'], allow_short=sidebar_dict['allow_short'])
|
| 48 |
+
col1_weights, col2_weights = st.columns(2)
|
| 49 |
+
|
| 50 |
+
with col1_weights:
|
| 51 |
+
st.subheader('Selected tickers:')
|
| 52 |
+
st.write(dataloader.selected_features)
|
| 53 |
+
with col2_weights:
|
| 54 |
+
st.subheader('Portfolio weights:')
|
| 55 |
+
st.write(portfolio.weights)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
col1_results, col2_results = st.columns(2)
|
| 59 |
+
with col1_results:
|
| 60 |
+
st.write(f"Validation Return Accuracy: {portfolio.val_return_accuracy:.4f}")
|
| 61 |
+
st.write(f"Validation Volatility Accuracy: {portfolio.val_volatility_accuracy:.4f}")
|
| 62 |
+
st.write(f"Validation Sharpe Ratio Deviation: {portfolio.val_sharpe_deviation:.4f}")
|
| 63 |
+
st.write(f"Validation Pred Return Sum: {portfolio.val_sum_pred_returns:.4f}")
|
| 64 |
+
st.write(f"Validation Actual Return Sum: {portfolio.val_sum_realized_returns:.4f}")
|
| 65 |
+
|
| 66 |
+
with col2_results:
|
| 67 |
+
st.write(f"Test Return Accuracy: {portfolio.test_return_accuracy:.4f}")
|
| 68 |
+
st.write(f"Test Volatility Accuracy: {portfolio.test_volatility_accuracy:.4f}")
|
| 69 |
+
st.write(f"Test Sharpe Ratio Deviation: {portfolio.test_sharpe_deviation:.4f}")
|
| 70 |
+
st.write(f"Test Pred Return Sum: {portfolio.test_sum_pred_returns:.4f}")
|
| 71 |
+
st.write(f"Test Actual Return Sum: {portfolio.test_sum_realized_returns:.4f}")
|
| 72 |
+
|
src/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ForecastAGLT
|
src/experiment_runner_for_best_models.py
ADDED
|
@@ -0,0 +1,568 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
def experiment(ticker, num_scale_steps, scaling_strategy, time_step_backward):
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
import math
|
| 7 |
+
|
| 8 |
+
# For Evalution we will use these library
|
| 9 |
+
|
| 10 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
|
| 11 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 12 |
+
|
| 13 |
+
# For model building we will use these library
|
| 14 |
+
|
| 15 |
+
from tensorflow.keras.models import Sequential
|
| 16 |
+
from tensorflow.keras.layers import Dense
|
| 17 |
+
from tensorflow.keras.layers import LSTM
|
| 18 |
+
from tensorflow.keras import initializers
|
| 19 |
+
from tensorflow.keras.callbacks import EarlyStopping
|
| 20 |
+
|
| 21 |
+
# For PLotting we will use these library
|
| 22 |
+
import matplotlib.pyplot as plt
|
| 23 |
+
|
| 24 |
+
import yfinance as yf
|
| 25 |
+
|
| 26 |
+
from gmdh import CriterionType, Criterion, Multi, Combi, Mia, Ria, PolynomialType
|
| 27 |
+
from chronos import ChronosPipeline
|
| 28 |
+
import torch
|
| 29 |
+
import pmdarima as pm
|
| 30 |
+
from pages.utils.utils import create_dataset, make_prediction
|
| 31 |
+
# @st.cache_data
|
| 32 |
+
def get_pipeline():
|
| 33 |
+
pipeline = ChronosPipeline.from_pretrained(
|
| 34 |
+
"amazon/chronos-t5-tiny",
|
| 35 |
+
device_map="cpu", # use "cpu" for CPU inference and "mps" for Apple Silicon
|
| 36 |
+
torch_dtype=torch.bfloat16)
|
| 37 |
+
return pipeline
|
| 38 |
+
|
| 39 |
+
pipeline = get_pipeline()
|
| 40 |
+
|
| 41 |
+
pd.options.display.float_format = '{:20,.4f}'.format
|
| 42 |
+
seed = 42
|
| 43 |
+
#tickers = ['BTC', 'ETH', 'BNB',
|
| 44 |
+
# 'XRP', 'STETH','ADA','DOGE',
|
| 45 |
+
# 'WTRX','LTC','SOL','TRX','DOT','MATIC','BCH','WBTC','TON11419',
|
| 46 |
+
# 'DAI','SHIB','AVAX','BUSD','LEO','LINK']
|
| 47 |
+
#intervals = ['1d', '1wk', '1mo']
|
| 48 |
+
#ticker = 'BTC' #st.selectbox("Ticker", options=tickers)
|
| 49 |
+
interval = '1d' #st.selectbox("Interval", options = intervals)
|
| 50 |
+
|
| 51 |
+
int_to_periods = {'1m':'5d', '2m':'1mo', '5m': '1mo','15m': '1mo','30m': '1mo','60m': '1mo','90m': '1mo',
|
| 52 |
+
'1h': '1y','1d': '10y','5d': '10y','1wk': '10y','1mo': '10y','3mo': '10y'}
|
| 53 |
+
|
| 54 |
+
period_cut = {'1d': '2022-02-19', '1wk': '2020-06-19', '1mo': '2014-06-19'}
|
| 55 |
+
|
| 56 |
+
try:
|
| 57 |
+
maindf = yf.download(tickers = f"{ticker}-USD", # list of tickers
|
| 58 |
+
period = 'max', #int_to_periods[interval], # time period
|
| 59 |
+
interval = interval, # trading interval
|
| 60 |
+
prepost = False, # download pre/post market hours data?
|
| 61 |
+
repair = True,) # repair obvious price errors e.g. 100x?
|
| 62 |
+
if len(maindf) == 0:
|
| 63 |
+
raise FileNotFoundError
|
| 64 |
+
except:
|
| 65 |
+
maindf = pd.read_csv(f'{ticker}.csv')
|
| 66 |
+
#maindf = yf.download('BTC-USD',start, end, auto_adjust=True)#['Close']
|
| 67 |
+
maindf=maindf.reset_index()
|
| 68 |
+
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
|
| 69 |
+
|
| 70 |
+
#maindf = pd.read_csv('BTC-USD.csv')
|
| 71 |
+
print('Total number of days present in the dataset: ',maindf.shape[0])
|
| 72 |
+
print('Total number of fields present in the dataset: ',maindf.shape[1])
|
| 73 |
+
print(maindf.head())
|
| 74 |
+
|
| 75 |
+
y_overall = maindf.copy()
|
| 76 |
+
#scaling_strategy_list = ['median', 'average', 'undersampling']
|
| 77 |
+
#scale_step_type_list = ['D','W','M','Y']
|
| 78 |
+
scale_step_type = 'D'
|
| 79 |
+
#num_scale_steps = 1
|
| 80 |
+
#scaling_strategy == 'average'
|
| 81 |
+
y_overall = y_overall[['Date','Close']]
|
| 82 |
+
if num_scale_steps > 1:
|
| 83 |
+
#scaling_expander.selectbox('Метод масштабирования', scaling_strategy_list)
|
| 84 |
+
scaling_step_combined = str(num_scale_steps) + scale_step_type
|
| 85 |
+
# Определяем сегодняшнюю дату
|
| 86 |
+
today = pd.Timestamp.now().normalize()
|
| 87 |
+
if scaling_strategy == 'average':
|
| 88 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
|
| 89 |
+
# Добавляем колонку для конца интервала
|
| 90 |
+
y_overall['Interval_End'] = today - (
|
| 91 |
+
(today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
|
| 92 |
+
scaling_step_combined)
|
| 93 |
+
# Группируем по интервалам и считаем среднее
|
| 94 |
+
y_overall = y_overall.groupby('Interval_End')['Close'].mean().reset_index()
|
| 95 |
+
# Сортируем результат
|
| 96 |
+
y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
|
| 97 |
+
y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
|
| 98 |
+
elif scaling_strategy == 'median':
|
| 99 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).median()
|
| 100 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
|
| 101 |
+
# Добавляем колонку для конца интервала
|
| 102 |
+
y_overall['Interval_End'] = today - (
|
| 103 |
+
(today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
|
| 104 |
+
scaling_step_combined)
|
| 105 |
+
# Группируем по интервалам и считаем среднее
|
| 106 |
+
y_overall = y_overall.groupby('Interval_End')['Close'].median().reset_index()
|
| 107 |
+
# Сортируем результат
|
| 108 |
+
y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
|
| 109 |
+
y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
|
| 110 |
+
else:
|
| 111 |
+
# y_overall = y_overall.resample(on = 'Date', rule = scaling_step_combined).last()
|
| 112 |
+
# Устанавливаем 'Date' как индекс, если это ещё не сделано
|
| 113 |
+
# y_overall = y_overall.set_index('Date')
|
| 114 |
+
# y_overall.columns = y_overall.columns.droplevel(1)
|
| 115 |
+
y_overall = y_overall.resample(on='Date', rule=scaling_step_combined, origin='end').last()
|
| 116 |
+
y_overall = y_overall.reset_index()
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
#names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])
|
| 120 |
+
fig, ax = plt.subplots()
|
| 121 |
+
ax.plot(y_overall['Close'], label = 'Stock Close Price')
|
| 122 |
+
ax.legend()
|
| 123 |
+
ax.set_title(f'Динамика цены закрытия для {ticker}')
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
#st.pyplot(fig)
|
| 127 |
+
#ax.plot()
|
| 128 |
+
|
| 129 |
+
#time_step_backward = 15 #st.sidebar.slider('Количество шагов назад для предикторов', 5, 60, 15)
|
| 130 |
+
time_step_forward = 1 #st.sidebar.slider('Количество шагов вперед для таргета', 1, 60, 1)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
pred_days = 1
|
| 134 |
+
recursive_pred = False
|
| 135 |
+
if time_step_forward == 1:
|
| 136 |
+
#expander = st.sidebar.expander('Режим ресурсивного прогноза')
|
| 137 |
+
pred_days = 15 #expander.slider('Количество шагов для ресурсивного прогноза', 1, 30, 15)
|
| 138 |
+
recursive_pred = True #expander.checkbox('Запустить рекурсивный прогноз')
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
GMDH = True #st.sidebar.checkbox('Добавить режим МГУА')
|
| 143 |
+
transformer = True #st.sidebar.checkbox('Добавить режим Transformer')
|
| 144 |
+
if GMDH:
|
| 145 |
+
#expander1 = st.sidebar.expander('Гиперпараметры МГУА')
|
| 146 |
+
GMDHs = {'Combi': Combi(), 'Multi': Multi(), 'Mia': Mia(), 'Ria': Ria()}
|
| 147 |
+
criterions = {'Критерий регулярности (несимметричная форма)': CriterionType.REGULARITY,
|
| 148 |
+
'Критерий регулярности (симметричная форма)': CriterionType.SYM_REGULARITY,
|
| 149 |
+
'Критерий стабильности (несимметричная форма)': CriterionType.STABILITY,
|
| 150 |
+
'Критерий стабильности (симметричная форма)': CriterionType.SYM_STABILITY,
|
| 151 |
+
'Критерий минимума смещения коэффициентов': CriterionType.UNBIASED_COEFFS,
|
| 152 |
+
'Критерий минимума смещения решений (несимметричная форма)': CriterionType.UNBIASED_OUTPUTS,
|
| 153 |
+
'Критерий минимума смещения решений (симметричная форма)': CriterionType.SYM_UNBIASED_OUTPUTS,
|
| 154 |
+
'Абсолютно помехоустойчивый критерий (несимметричная форма)': CriterionType.ABSOLUTE_NOISE_IMMUNITY,
|
| 155 |
+
'Абсолютно помехоустойчивый критерий (симметричная форма)': CriterionType.SYM_ABSOLUTE_NOISE_IMMUNITY}
|
| 156 |
+
polynoms = {'LINEAR': PolynomialType.LINEAR,
|
| 157 |
+
'LINEAR_COV': PolynomialType.LINEAR_COV,
|
| 158 |
+
'QUADRATIC': PolynomialType.QUADRATIC}
|
| 159 |
+
GMDH_algo1 = 'Multi' #expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
|
| 160 |
+
criterion1 = 'Критерий регулярности (несимметричная форма)' #expander1.selectbox("Внешний критерий", options = criterions.keys())
|
| 161 |
+
p_average1 = 1 #expander1.slider('p_average', 1, 10, 1)
|
| 162 |
+
limit1 = 0. #expander1.number_input('limit', value = 0.)
|
| 163 |
+
k_best1 = 1 #expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
|
| 164 |
+
polynom1 = 'LINEAR' #expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
|
| 165 |
+
GMDH_algo2 = 'Ria' #expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
|
| 166 |
+
criterion2 = 'Критерий регулярности (несимметричная форма)' #expander1.selectbox("Внешний критерий", options = criterions.keys())
|
| 167 |
+
p_average2 = 1 #expander1.slider('p_average', 1, 10, 1)
|
| 168 |
+
limit2 = 0. #expander1.number_input('limit', value = 0.)
|
| 169 |
+
k_best2 = 3 #expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
|
| 170 |
+
polynom2 = 'QUADRATIC' #expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
|
| 171 |
+
|
| 172 |
+
y_overall.columns = y_overall.columns.droplevel(1)#.droplevel()
|
| 173 |
+
#y_overall = y_overall.reset_index()
|
| 174 |
+
|
| 175 |
+
#if run:
|
| 176 |
+
# my_bar = st.progress(0, text='Model training progress. Truncating the dataset now')
|
| 177 |
+
# Lets First Take all the Close Price
|
| 178 |
+
closedf = y_overall[['Date', 'Close']].dropna() # maindf[['Date', 'Close']]
|
| 179 |
+
print("Shape of close dataframe:", closedf.shape)
|
| 180 |
+
closedf = closedf[-1000:] # closedf[closedf['Date'] > period_cut[interval]]
|
| 181 |
+
close_stock = closedf.copy()
|
| 182 |
+
print("Total data for prediction: ", closedf.shape[0])
|
| 183 |
+
# my_bar.progress(10 + 1, text='Truncated the dataset -> Scaling it')
|
| 184 |
+
# deleting date column and normalizing using MinMax Scaler
|
| 185 |
+
|
| 186 |
+
scaler = MinMaxScaler(feature_range=(0, 1))
|
| 187 |
+
# closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1))
|
| 188 |
+
print(closedf.shape)
|
| 189 |
+
|
| 190 |
+
# my_bar.progress(20 + 1, text='Scaled the dataset -> Splitting it into subsamples')
|
| 191 |
+
# we keep the training set as 60% and 40% testing set
|
| 192 |
+
|
| 193 |
+
training_size = int(len(closedf) * 0.70)
|
| 194 |
+
test_size = len(closedf) - training_size
|
| 195 |
+
assert test_size > 2*(time_step_backward + time_step_forward), "Test_size is shorter than 2 x time_step_backward + time_step_forward"
|
| 196 |
+
train_data, test_data = closedf[0:training_size], closedf[training_size:len(closedf)]
|
| 197 |
+
train_start_date, train_end_date = train_data['Date'].iloc[0], train_data['Date'].iloc[
|
| 198 |
+
-1] # TO BE ADDED TO PY FILE!!!
|
| 199 |
+
|
| 200 |
+
del closedf['Date'], train_data['Date'], test_data['Date'] # TO BE ADDED TO PY FILE!!!
|
| 201 |
+
train_data = scaler.fit_transform(train_data)
|
| 202 |
+
test_data = scaler.transform(test_data)
|
| 203 |
+
print("train_data: ", train_data.shape)
|
| 204 |
+
print("test_data: ", test_data.shape)
|
| 205 |
+
|
| 206 |
+
# my_bar.progress(30 + 1, text='Split it into subsamples -> Cutting them into observations')
|
| 207 |
+
|
| 208 |
+
X_train, y_train = create_dataset(train_data, time_step_backward, time_step_forward)
|
| 209 |
+
X_test, y_test = create_dataset(test_data, time_step_backward, time_step_forward)
|
| 210 |
+
|
| 211 |
+
print("X_train: ", X_train.shape)
|
| 212 |
+
print("y_train: ", y_train.shape)
|
| 213 |
+
print("X_test: ", X_test.shape)
|
| 214 |
+
print("y_test", y_test.shape)
|
| 215 |
+
|
| 216 |
+
# reshape input to be [samples, time steps, features] which is required for LSTM
|
| 217 |
+
X_train_gmdh = X_train.copy()
|
| 218 |
+
X_test_gmdh = X_test.copy()
|
| 219 |
+
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
| 220 |
+
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
|
| 221 |
+
|
| 222 |
+
print("X_train: ", X_train.shape)
|
| 223 |
+
print("X_test: ", X_test.shape)
|
| 224 |
+
|
| 225 |
+
# my_bar.progress(40 + 1, text='Cut it into observations -> Training the model')
|
| 226 |
+
model = Sequential()
|
| 227 |
+
model.add(LSTM(10, input_shape=(None, 1), activation="relu",
|
| 228 |
+
kernel_initializer=initializers.GlorotNormal(seed=seed),
|
| 229 |
+
bias_initializer=initializers.GlorotNormal(seed=seed)))
|
| 230 |
+
model.add(Dense(1,
|
| 231 |
+
kernel_initializer=initializers.GlorotNormal(seed=seed),
|
| 232 |
+
bias_initializer=initializers.GlorotNormal(seed=seed)))
|
| 233 |
+
model.compile(loss="mean_squared_error", optimizer="adam")
|
| 234 |
+
callback = EarlyStopping(monitor='loss', patience=30, restore_best_weights=True)
|
| 235 |
+
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=False,
|
| 236 |
+
callbacks=[callback])
|
| 237 |
+
|
| 238 |
+
arima_model = pm.auto_arima(train_data,
|
| 239 |
+
m=12, # frequency of series
|
| 240 |
+
seasonal=True, # TRUE if seasonal series
|
| 241 |
+
d=None, # let model determine 'd'
|
| 242 |
+
test='adf', # use adftest to find optimal 'd'
|
| 243 |
+
start_p=0, start_q=0, # minimum p and q
|
| 244 |
+
max_p=time_step_backward, max_q=time_step_backward, # maximum p and q
|
| 245 |
+
D=None, # let model determine 'D'
|
| 246 |
+
trace=True,
|
| 247 |
+
error_action='ignore',
|
| 248 |
+
suppress_warnings=True,
|
| 249 |
+
stepwise=True)
|
| 250 |
+
# st.text(arima_model.summary())
|
| 251 |
+
print(arima_model.summary())
|
| 252 |
+
|
| 253 |
+
if GMDH:
|
| 254 |
+
model_gmdh1 = GMDHs[GMDH_algo1]
|
| 255 |
+
if GMDH_algo1 == 'Combi':
|
| 256 |
+
model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
|
| 257 |
+
criterion=Criterion(criterion_type=criterions[criterion1]))
|
| 258 |
+
if GMDH_algo1 == 'Multi':
|
| 259 |
+
model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
|
| 260 |
+
criterion=Criterion(criterion_type=criterions[criterion1]),
|
| 261 |
+
k_best=k_best1)
|
| 262 |
+
if GMDH_algo1 in ['Ria', 'Mia']:
|
| 263 |
+
model_gmdh1.fit(X_train_gmdh, y_train, p_average=p_average1, limit=limit1, test_size=0.3,
|
| 264 |
+
criterion=Criterion(criterion_type=criterions[criterion1]),
|
| 265 |
+
k_best=k_best1, polynomial_type=polynoms[polynom1])
|
| 266 |
+
# st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
|
| 267 |
+
print(f"GMDH model 1: {model_gmdh1.get_best_polynomial()}")
|
| 268 |
+
|
| 269 |
+
model_gmdh2 = GMDHs[GMDH_algo2]
|
| 270 |
+
if GMDH_algo2 == 'Combi':
|
| 271 |
+
model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
|
| 272 |
+
criterion=Criterion(criterion_type=criterions[criterion2]))
|
| 273 |
+
if GMDH_algo2 == 'Multi':
|
| 274 |
+
model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
|
| 275 |
+
criterion=Criterion(criterion_type=criterions[criterion2]),
|
| 276 |
+
k_best=k_best2)
|
| 277 |
+
if GMDH_algo2 in ['Ria', 'Mia']:
|
| 278 |
+
model_gmdh2.fit(X_train_gmdh, y_train, p_average=p_average2, limit=limit2, test_size=0.3,
|
| 279 |
+
criterion=Criterion(criterion_type=criterions[criterion2]),
|
| 280 |
+
k_best=k_best2, polynomial_type=polynoms[polynom1])
|
| 281 |
+
# st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
|
| 282 |
+
print(f"GMDH model 2: {model_gmdh2.get_best_polynomial()}")
|
| 283 |
+
"""
|
| 284 |
+
if transformer:
|
| 285 |
+
X_train_context = torch.tensor(X_train_gmdh)
|
| 286 |
+
X_test_context = torch.tensor(X_test_gmdh)
|
| 287 |
+
X_train_forecast = pipeline.predict(
|
| 288 |
+
X_train_context,
|
| 289 |
+
time_step_forward,
|
| 290 |
+
num_samples=3,
|
| 291 |
+
temperature=1.0,
|
| 292 |
+
top_k=50,
|
| 293 |
+
top_p=1.0)
|
| 294 |
+
X_test_forecast = pipeline.predict(
|
| 295 |
+
X_test_context,
|
| 296 |
+
time_step_forward,
|
| 297 |
+
num_samples=3,
|
| 298 |
+
temperature=1.0,
|
| 299 |
+
top_k=50,
|
| 300 |
+
top_p=1.0)
|
| 301 |
+
"""
|
| 302 |
+
|
| 303 |
+
# my_bar.progress(70 + 1, text='Trained model -> Calculating loss')
|
| 304 |
+
import matplotlib.pyplot as plt
|
| 305 |
+
|
| 306 |
+
loss = history.history['loss']
|
| 307 |
+
val_loss = history.history['val_loss']
|
| 308 |
+
|
| 309 |
+
epochs = range(len(loss))
|
| 310 |
+
|
| 311 |
+
fig, ax = plt.subplots()
|
| 312 |
+
ax.plot(epochs, loss, 'r', label='Training loss')
|
| 313 |
+
ax.plot(epochs, val_loss, 'b', label='Validation loss')
|
| 314 |
+
ax.legend()
|
| 315 |
+
ax.set_title('Потери на обучении и валидации')
|
| 316 |
+
|
| 317 |
+
# st.pyplot(fig)
|
| 318 |
+
ax.plot()
|
| 319 |
+
# my_bar.progress(80 + 1, text='Calculated loss -> Scoring the dataset')
|
| 320 |
+
|
| 321 |
+
original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1))
|
| 322 |
+
original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1))
|
| 323 |
+
|
| 324 |
+
train_predict, test_predict = make_prediction(X_train, X_test, method='LSTM', model=model,
|
| 325 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 326 |
+
train_predict_arima, test_predict_arima = make_prediction(X_train, X_test, method='SARIMA', model=arima_model,
|
| 327 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 328 |
+
if GMDH:
|
| 329 |
+
train_predict_gmdh1, test_predict_gmdh1 = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH',
|
| 330 |
+
model=model_gmdh1,
|
| 331 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 332 |
+
train_predict_gmdh2, test_predict_gmdh2 = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH',
|
| 333 |
+
model=model_gmdh2,
|
| 334 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 335 |
+
if transformer:
|
| 336 |
+
X_train_forecast_median, X_test_forecast_median = make_prediction(X_train_gmdh, X_test_gmdh,
|
| 337 |
+
method='Transformer', model=pipeline,
|
| 338 |
+
scaler=scaler,
|
| 339 |
+
time_step_forward=time_step_forward)
|
| 340 |
+
|
| 341 |
+
# Evaluation metrices RMSE and MAE
|
| 342 |
+
metrics_tmp = {}
|
| 343 |
+
metrics1 = {}
|
| 344 |
+
metrics1['LSTM'] = []
|
| 345 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict))
|
| 346 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict)
|
| 347 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict)
|
| 348 |
+
metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict)
|
| 349 |
+
print("-------------------------------------------------------------------------------------")
|
| 350 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict))
|
| 351 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict)
|
| 352 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict)
|
| 353 |
+
metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict)
|
| 354 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict)
|
| 355 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict)
|
| 356 |
+
for metric in metrics_tmp:
|
| 357 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 358 |
+
metrics1['LSTM'].append(metrics_tmp[metric])
|
| 359 |
+
|
| 360 |
+
metrics1['SARIMA'] = []
|
| 361 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_arima))
|
| 362 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_arima)
|
| 363 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_arima)
|
| 364 |
+
metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_arima)
|
| 365 |
+
print("-------------------------------------------------------------------------------------")
|
| 366 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_arima))
|
| 367 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_arima)
|
| 368 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_arima)
|
| 369 |
+
metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_arima)
|
| 370 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_arima)
|
| 371 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_arima)
|
| 372 |
+
for metric in metrics_tmp:
|
| 373 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 374 |
+
metrics1['SARIMA'].append(metrics_tmp[metric])
|
| 375 |
+
if GMDH:
|
| 376 |
+
metrics1['GMDH_1'] = []
|
| 377 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh1))
|
| 378 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh1)
|
| 379 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh1)
|
| 380 |
+
metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_gmdh1)
|
| 381 |
+
print("-------------------------------------------------------------------------------------")
|
| 382 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh1))
|
| 383 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh1)
|
| 384 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh1)
|
| 385 |
+
metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_gmdh1)
|
| 386 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh1)
|
| 387 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh1)
|
| 388 |
+
for metric in metrics_tmp:
|
| 389 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 390 |
+
metrics1['GMDH_1'].append(metrics_tmp[metric])
|
| 391 |
+
|
| 392 |
+
metrics1['GMDH_2'] = []
|
| 393 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh2))
|
| 394 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh2)
|
| 395 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh2)
|
| 396 |
+
metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, train_predict_gmdh2)
|
| 397 |
+
print("-------------------------------------------------------------------------------------")
|
| 398 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh2))
|
| 399 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh2)
|
| 400 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh2)
|
| 401 |
+
metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, test_predict_gmdh2)
|
| 402 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh2)
|
| 403 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh2)
|
| 404 |
+
for metric in metrics_tmp:
|
| 405 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 406 |
+
metrics1['GMDH_2'].append(metrics_tmp[metric])
|
| 407 |
+
|
| 408 |
+
if transformer:
|
| 409 |
+
metrics1['Transformer'] = []
|
| 410 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, X_train_forecast_median))
|
| 411 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, X_train_forecast_median)
|
| 412 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, X_train_forecast_median)
|
| 413 |
+
metrics_tmp["Train data MAPE"] = mean_absolute_percentage_error(original_ytrain, X_train_forecast_median)
|
| 414 |
+
print("-------------------------------------------------------------------------------------")
|
| 415 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, X_test_forecast_median))
|
| 416 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, X_test_forecast_median)
|
| 417 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, X_test_forecast_median)
|
| 418 |
+
metrics_tmp["Test data MAPE"] = mean_absolute_percentage_error(original_ytest, X_test_forecast_median)
|
| 419 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, X_train_forecast_median)
|
| 420 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, X_test_forecast_median)
|
| 421 |
+
for metric in metrics_tmp:
|
| 422 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 423 |
+
metrics1['Transformer'].append(metrics_tmp[metric])
|
| 424 |
+
|
| 425 |
+
metrics_df = pd.DataFrame.from_dict(metrics1, orient='columns') # (metrics, columns = ['LSTM', 'GMDH'])
|
| 426 |
+
metrics_df.index = metrics_tmp.keys()
|
| 427 |
+
# st.write(metrics_df)
|
| 428 |
+
metrics_df.round(3)
|
| 429 |
+
print(metrics_df)
|
| 430 |
+
# my_bar.progress(90 + 1, text='Calculated performance metrics -> Plotting predictions')
|
| 431 |
+
|
| 432 |
+
# shift train predictions for plotting
|
| 433 |
+
|
| 434 |
+
lag = time_step_backward + (time_step_forward - 1)
|
| 435 |
+
trainPredictPlot_arima = np.empty_like(closedf)
|
| 436 |
+
trainPredictPlot_arima[:, :] = np.nan
|
| 437 |
+
trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :] = train_predict_arima
|
| 438 |
+
print(trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :].shape, train_predict_arima.shape)
|
| 439 |
+
print("Train predicted data: ", trainPredictPlot_arima.shape)
|
| 440 |
+
|
| 441 |
+
# shift test predictions for plotting
|
| 442 |
+
testPredictPlot_arima = np.empty_like(closedf)
|
| 443 |
+
testPredictPlot_arima[:, :] = np.nan
|
| 444 |
+
testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :] = test_predict_arima
|
| 445 |
+
print(testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :].shape, test_predict_arima.shape)
|
| 446 |
+
print("Test predicted data: ", testPredictPlot_arima.shape)
|
| 447 |
+
|
| 448 |
+
# lag = time_step_backward + (time_step_forward - 1)
|
| 449 |
+
trainPredictPlot = np.empty_like(closedf)
|
| 450 |
+
trainPredictPlot[:, :] = np.nan
|
| 451 |
+
trainPredictPlot[lag:len(train_predict) + lag, :] = train_predict
|
| 452 |
+
print(trainPredictPlot[lag:len(train_predict) + lag, :].shape, train_predict.shape)
|
| 453 |
+
print("Train predicted data: ", trainPredictPlot.shape)
|
| 454 |
+
|
| 455 |
+
# shift test predictions for plotting
|
| 456 |
+
testPredictPlot = np.empty_like(closedf)
|
| 457 |
+
testPredictPlot[:, :] = np.nan
|
| 458 |
+
testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :] = test_predict
|
| 459 |
+
print(testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :].shape, test_predict.shape)
|
| 460 |
+
print("Test predicted data: ", testPredictPlot.shape)
|
| 461 |
+
|
| 462 |
+
if GMDH:
|
| 463 |
+
trainPredictPlot_gmdh1 = np.empty_like(closedf)
|
| 464 |
+
trainPredictPlot_gmdh1[:, :] = np.nan
|
| 465 |
+
trainPredictPlot_gmdh1[lag:len(train_predict_gmdh1) + lag, :] = train_predict_gmdh1
|
| 466 |
+
print(trainPredictPlot_gmdh1[lag:len(train_predict_gmdh1) + lag, :].shape, train_predict_gmdh1.shape)
|
| 467 |
+
|
| 468 |
+
testPredictPlot_gmdh1 = np.empty_like(closedf)
|
| 469 |
+
testPredictPlot_gmdh1[:, :] = np.nan
|
| 470 |
+
testPredictPlot_gmdh1[len(train_predict_gmdh1) + (lag * 2):len(closedf), :] = test_predict_gmdh1
|
| 471 |
+
print(testPredictPlot_gmdh1[len(train_predict_gmdh1) + (lag * 2):len(closedf), :].shape, test_predict_gmdh1.shape)
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
trainPredictPlot_gmdh2 = np.empty_like(closedf)
|
| 475 |
+
trainPredictPlot_gmdh2[:, :] = np.nan
|
| 476 |
+
trainPredictPlot_gmdh2[lag:len(train_predict_gmdh2) + lag, :] = train_predict_gmdh2
|
| 477 |
+
print(trainPredictPlot_gmdh2[lag:len(train_predict_gmdh2) + lag, :].shape, train_predict_gmdh2.shape)
|
| 478 |
+
|
| 479 |
+
testPredictPlot_gmdh2 = np.empty_like(closedf)
|
| 480 |
+
testPredictPlot_gmdh2[:, :] = np.nan
|
| 481 |
+
testPredictPlot_gmdh2[len(train_predict_gmdh2) + (lag * 2):len(closedf), :] = test_predict_gmdh2
|
| 482 |
+
print(testPredictPlot_gmdh2[len(train_predict_gmdh2) + (lag * 2):len(closedf), :].shape, test_predict_gmdh2.shape)
|
| 483 |
+
|
| 484 |
+
if transformer:
|
| 485 |
+
trainPredictPlot_transformer = np.empty_like(closedf)
|
| 486 |
+
trainPredictPlot_transformer[:, :] = np.nan
|
| 487 |
+
trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :] = X_train_forecast_median
|
| 488 |
+
print(trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :].shape,
|
| 489 |
+
X_train_forecast_median.shape)
|
| 490 |
+
|
| 491 |
+
testPredictPlot_transformer = np.empty_like(closedf)
|
| 492 |
+
testPredictPlot_transformer[:, :] = np.nan
|
| 493 |
+
testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :] = X_test_forecast_median
|
| 494 |
+
print(testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :].shape,
|
| 495 |
+
X_test_forecast_median.shape)
|
| 496 |
+
|
| 497 |
+
if GMDH:
|
| 498 |
+
if transformer:
|
| 499 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 500 |
+
'original_close': close_stock['Close'],
|
| 501 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 502 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 503 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 504 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 505 |
+
'train_predicted_close_gmdh_1': trainPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
|
| 506 |
+
'test_predicted_close_gmdh_1': testPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
|
| 507 |
+
'train_predicted_close_gmdh_2': trainPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
|
| 508 |
+
'test_predicted_close_gmdh_2': testPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
|
| 509 |
+
'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
|
| 510 |
+
'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
|
| 511 |
+
elif not transformer:
|
| 512 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 513 |
+
'original_close': close_stock['Close'],
|
| 514 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 515 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 516 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 517 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 518 |
+
'train_predicted_close_gmdh_1': trainPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
|
| 519 |
+
'test_predicted_close_gmdh_1': testPredictPlot_gmdh1.reshape(1, -1)[0].tolist(),
|
| 520 |
+
'train_predicted_close_gmdh_2': trainPredictPlot_gmdh2.reshape(1, -1)[0].tolist(),
|
| 521 |
+
'test_predicted_close_gmdh_2': testPredictPlot_gmdh2.reshape(1, -1)[0].tolist()})
|
| 522 |
+
elif not GMDH:
|
| 523 |
+
if transformer:
|
| 524 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 525 |
+
'original_close': close_stock['Close'],
|
| 526 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 527 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 528 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 529 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 530 |
+
'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[
|
| 531 |
+
0].tolist(),
|
| 532 |
+
'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[
|
| 533 |
+
0].tolist()})
|
| 534 |
+
else:
|
| 535 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 536 |
+
'original_close': close_stock['Close'],
|
| 537 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 538 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 539 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 540 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()})
|
| 541 |
+
fig, ax = plt.subplots()
|
| 542 |
+
ax.plot(plotdf['date'], plotdf['original_close'], label='Оригинальная цена закрытия')
|
| 543 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_arima'],
|
| 544 |
+
label='Предсказанная цена закрытия на тренировке SARIMA')
|
| 545 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_arima'], label='Предсказанная цена закрытия на тесте SARIMA')
|
| 546 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close'], label='Предсказанная цена закрытия на тренировке')
|
| 547 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close'], label='Предсказанная цена закрытия на тесте')
|
| 548 |
+
if GMDH:
|
| 549 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh_1'],
|
| 550 |
+
label='Предсказанная цена закрытия на тренировке GMDH_1')
|
| 551 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh_1'], label='Предсказанная цена закрытия на тесте GMDH_1')
|
| 552 |
+
|
| 553 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh_2'],
|
| 554 |
+
label='Предсказанная цена закрытия на тренировке GMDH_2')
|
| 555 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh_2'], label='Предсказанная цена закрытия на тесте GMDH_2')
|
| 556 |
+
if transformer:
|
| 557 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_transformer'],
|
| 558 |
+
label='Предсказанная цена закрытия на тренировке Transformer')
|
| 559 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_transformer'],
|
| 560 |
+
label='Предсказанная цена закрытия на тесте Transformer')
|
| 561 |
+
ax.legend()
|
| 562 |
+
ax.set_title("Сравнение исходных и смоделированных цен")
|
| 563 |
+
# st.pyplot(fig)
|
| 564 |
+
#ax.plot()
|
| 565 |
+
|
| 566 |
+
models_dict = {'LSTM': model, 'SARIMA': arima_model, 'GMDH_1': model_gmdh1, 'GMDH_2': model_gmdh2, 'Transformer': pipeline}
|
| 567 |
+
|
| 568 |
+
return plotdf, metrics_df, models_dict
|
src/experiment_runner_for_portfolio.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from experiment_runner_for_best_models import experiment
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
import numpy as np
|
| 6 |
+
import scipy.optimize as sco
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class DataLoader():
|
| 10 |
+
def __init__(self, correlation_threshold: float = 0.9):
|
| 11 |
+
self.correlation_threshold = correlation_threshold
|
| 12 |
+
# Function to get top N cryptocurrency tickers
|
| 13 |
+
def get_top_crypto_tickers(self, n):
|
| 14 |
+
|
| 15 |
+
url = 'https://api.coingecko.com/api/v3/coins/markets'
|
| 16 |
+
params = {
|
| 17 |
+
'vs_currency': 'usd',
|
| 18 |
+
'order': 'market_cap_desc',
|
| 19 |
+
'per_page': n,
|
| 20 |
+
'page': 1,
|
| 21 |
+
'sparkline': 'false'
|
| 22 |
+
}
|
| 23 |
+
response = requests.get(url, params=params)
|
| 24 |
+
data = response.json()
|
| 25 |
+
tickers = [coin['symbol'].upper() for coin in data]
|
| 26 |
+
return tickers
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Function to validate if a ticker is compatible with yfinance
|
| 30 |
+
def validate_ticker(self, ticker):
|
| 31 |
+
import yfinance as yf
|
| 32 |
+
try:
|
| 33 |
+
ticker += '-USD'
|
| 34 |
+
info = yf.Ticker(ticker).info
|
| 35 |
+
return bool(info) # Returns True if info is not empty
|
| 36 |
+
except Exception:
|
| 37 |
+
return False
|
| 38 |
+
|
| 39 |
+
def experiment_data(self, top_n: int = 3, num_scale_steps: int = 1, scaling_strategy: str = 'average', time_step_backward: int = 15):
|
| 40 |
+
# Retrieve top N tickers
|
| 41 |
+
#top_n = 10
|
| 42 |
+
self.tickers = self.get_top_crypto_tickers(top_n)
|
| 43 |
+
# Validate tickers for compatibility with yfinance
|
| 44 |
+
self.valid_tickers = [ticker for ticker in self.tickers if self.validate_ticker(ticker)]
|
| 45 |
+
print("Compatible tickers for yfinance:", len(self.valid_tickers))
|
| 46 |
+
self.invalid_tickers = []
|
| 47 |
+
|
| 48 |
+
# Run experiments for each valid ticker
|
| 49 |
+
self.tickers_dict = {}
|
| 50 |
+
for ticker in self.valid_tickers:
|
| 51 |
+
try:
|
| 52 |
+
self.tickers_dict[ticker] = {}
|
| 53 |
+
plot_df, metrics_df, models_dict = experiment(ticker=ticker, num_scale_steps=num_scale_steps,
|
| 54 |
+
scaling_strategy=scaling_strategy, time_step_backward=time_step_backward)
|
| 55 |
+
self.tickers_dict[ticker]['plot_df'] = plot_df
|
| 56 |
+
self.tickers_dict[ticker]['metrics_df'] = metrics_df
|
| 57 |
+
self.tickers_dict[ticker]['models_dict'] = models_dict
|
| 58 |
+
except AssertionError as e: # Или другой конкретный тип ошибки
|
| 59 |
+
print('EXCEPTION ', str(e), ticker)
|
| 60 |
+
self.invalid_tickers.append(ticker)
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
for invalid_ticker in self.invalid_tickers:
|
| 64 |
+
self.valid_tickers.remove(invalid_ticker)
|
| 65 |
+
|
| 66 |
+
# Mapping for prediction columns
|
| 67 |
+
test_predictions_model_mapper = {
|
| 68 |
+
'SARIMA': 'test_predicted_close_arima',
|
| 69 |
+
'LSTM': 'test_predicted_close',
|
| 70 |
+
'GMDH_1': 'test_predicted_close_gmdh_1',
|
| 71 |
+
'GMDH_2': 'test_predicted_close_gmdh_2',
|
| 72 |
+
'Transformer': 'test_predicted_close_transformer'
|
| 73 |
+
}
|
| 74 |
+
train_predictions_model_mapper = {
|
| 75 |
+
'SARIMA': 'train_predicted_close_arima',
|
| 76 |
+
'LSTM': 'train_predicted_close',
|
| 77 |
+
'GMDH_1': 'train_predicted_close_gmdh_1',
|
| 78 |
+
'GMDH_2': 'train_predicted_close_gmdh_2',
|
| 79 |
+
'Transformer': 'train_predicted_close_transformer'
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
# Determine global training and testing periods
|
| 83 |
+
self.global_min_date = datetime(2000, 1, 1, 0, 0)
|
| 84 |
+
self.global_max_date = datetime.now()
|
| 85 |
+
for ticker in self.valid_tickers:
|
| 86 |
+
train_last_valid_index = self.tickers_dict[ticker]['plot_df']['train_predicted_close_arima'].last_valid_index()
|
| 87 |
+
train_last_date = self.tickers_dict[ticker]['plot_df'].loc[train_last_valid_index, 'date']
|
| 88 |
+
if train_last_date < self.global_max_date:
|
| 89 |
+
self.global_max_date = train_last_date
|
| 90 |
+
|
| 91 |
+
test_first_valid_index = self.tickers_dict[ticker]['plot_df']['test_predicted_close_arima'].first_valid_index()
|
| 92 |
+
test_first_date = self.tickers_dict[ticker]['plot_df'].loc[test_first_valid_index, 'date']
|
| 93 |
+
if test_first_date > self.global_min_date:
|
| 94 |
+
self.global_min_date = test_first_date
|
| 95 |
+
|
| 96 |
+
print(train_last_date, train_last_valid_index, test_first_date, test_first_valid_index)
|
| 97 |
+
|
| 98 |
+
print(self.global_min_date , self.global_max_date)
|
| 99 |
+
|
| 100 |
+
# Collect predictions for the global periods
|
| 101 |
+
self.train_predictions_df_list = []
|
| 102 |
+
self.test_predictions_df_list = []
|
| 103 |
+
self.actual_prices_train = []
|
| 104 |
+
self.actual_prices_test = []
|
| 105 |
+
for ticker in tqdm(self.valid_tickers):
|
| 106 |
+
best_model = self.tickers_dict[ticker]['metrics_df'].T.sort_values(by='Test data MAPE', ascending=True).index[0]
|
| 107 |
+
train_predictions = self.tickers_dict[ticker]['plot_df'][['date', train_predictions_model_mapper[best_model]]]
|
| 108 |
+
train_predictions = train_predictions[train_predictions['date'] <= self.global_max_date]
|
| 109 |
+
train_predictions.rename(columns={train_predictions_model_mapper[best_model]: ticker}, inplace=True)
|
| 110 |
+
self.train_predictions_df_list.append(train_predictions)
|
| 111 |
+
|
| 112 |
+
actual_train = self.tickers_dict[ticker]['plot_df'][['date', 'original_close']]
|
| 113 |
+
actual_train = actual_train[actual_train['date'] <= self.global_max_date]
|
| 114 |
+
actual_train.rename(columns={'original_close': ticker}, inplace=True)
|
| 115 |
+
self.actual_prices_train.append(actual_train)
|
| 116 |
+
|
| 117 |
+
test_predictions = self.tickers_dict[ticker]['plot_df'][['date', test_predictions_model_mapper[best_model]]]
|
| 118 |
+
test_predictions = test_predictions[test_predictions['date'] >= self.global_min_date]
|
| 119 |
+
test_predictions.rename(columns={test_predictions_model_mapper[best_model]: ticker}, inplace=True)
|
| 120 |
+
self.test_predictions_df_list.append(test_predictions)
|
| 121 |
+
|
| 122 |
+
actual_test = self.tickers_dict[ticker]['plot_df'][['date', 'original_close']]
|
| 123 |
+
actual_test = actual_test[actual_test['date'] >= self.global_min_date]
|
| 124 |
+
actual_test.rename(columns={'original_close': ticker}, inplace=True)
|
| 125 |
+
self.actual_prices_test.append(actual_test)
|
| 126 |
+
|
| 127 |
+
self.selected_features = [self.valid_tickers[0]]
|
| 128 |
+
#correlation_threshold = 0.9
|
| 129 |
+
for idx, feature in enumerate(self.valid_tickers):
|
| 130 |
+
if idx == 0:
|
| 131 |
+
continue
|
| 132 |
+
print(idx, feature)
|
| 133 |
+
tmp = self.train_predictions_df_list[0].merge(self.train_predictions_df_list[idx], on='date', how='inner')
|
| 134 |
+
# Вычисляем корреляцию нового признака с уже выбранными
|
| 135 |
+
correlations = [abs(tmp[feature].corr(tmp[sel_feature])) for sel_feature in self.selected_features]
|
| 136 |
+
print(correlations)
|
| 137 |
+
max_correlation = max(correlations)
|
| 138 |
+
|
| 139 |
+
# Добавляем признак, если максимальная корреляция не превышает порог
|
| 140 |
+
if max_correlation < self.correlation_threshold:
|
| 141 |
+
self.selected_features.append(feature)
|
| 142 |
+
self.train_predictions_df_list[0] = self.train_predictions_df_list[0].merge(self.train_predictions_df_list[idx], on='date', how='inner')
|
| 143 |
+
self.actual_prices_train[0] = self.actual_prices_train[0].merge(self.actual_prices_train[idx], on='date', how='inner')
|
| 144 |
+
self.test_predictions_df_list[0] = self.test_predictions_df_list[0].merge(self.test_predictions_df_list[idx], on='date', how='inner')
|
| 145 |
+
self.actual_prices_test[0] = self.actual_prices_test[0].merge(self.actual_prices_test[idx], on='date', how='inner')
|
| 146 |
+
print(self.selected_features)
|
| 147 |
+
|
| 148 |
+
selected_features_and_date = ['date'] + self.selected_features
|
| 149 |
+
print(selected_features_and_date)
|
| 150 |
+
|
| 151 |
+
# Calculate covariance matrix for the training period
|
| 152 |
+
train_data = self.train_predictions_df_list[0].drop(columns=['date']).astype(float)
|
| 153 |
+
self.cov_matrix = train_data[self.selected_features].cov()
|
| 154 |
+
print("Covariance matrix for the training period:")
|
| 155 |
+
print(self.cov_matrix)
|
| 156 |
+
|
| 157 |
+
# Split the global test period into validation and test sets
|
| 158 |
+
self.validation_size = int(len(self.test_predictions_df_list[0][selected_features_and_date]) * 0.5)
|
| 159 |
+
self.validation_data = self.test_predictions_df_list[0][selected_features_and_date].iloc[:self.validation_size]
|
| 160 |
+
self.validation_actual = self.actual_prices_test[0][selected_features_and_date].iloc[:self.validation_size]
|
| 161 |
+
self.test_data = self.test_predictions_df_list[0][selected_features_and_date].iloc[self.validation_size:]
|
| 162 |
+
self.test_actual = self.actual_prices_test[0][selected_features_and_date].iloc[self.validation_size:]
|
| 163 |
+
|
| 164 |
+
# Проверка положительной определённости
|
| 165 |
+
if np.any(np.linalg.eigvals(self.cov_matrix) <= 0):
|
| 166 |
+
raise ValueError("Ковариационная матрица не является положительно определённой.")
|
| 167 |
+
|
| 168 |
+
return self.cov_matrix, self.validation_data, self.validation_actual, self.test_data, self.test_actual, self.train_predictions_df_list, self.actual_prices_train, self.test_predictions_df_list, self.actual_prices_test, self.tickers_dict
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
class Portfolio():
|
| 172 |
+
|
| 173 |
+
def calculate_portfolio_metrics(self, weights, returns, cov_matrix):
|
| 174 |
+
portfolio_return = np.dot(weights, returns)
|
| 175 |
+
portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(cov_matrix, weights)))
|
| 176 |
+
return portfolio_return, portfolio_volatility
|
| 177 |
+
|
| 178 |
+
def optimize(self, returns, cov_matrix, target_return=None, allow_short=False):
|
| 179 |
+
num_assets = len(returns)
|
| 180 |
+
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})
|
| 181 |
+
if allow_short:
|
| 182 |
+
bounds = tuple((-1, 1) for _ in range(num_assets)) # Allow short positions
|
| 183 |
+
else:
|
| 184 |
+
bounds = tuple((0, 1) for _ in range(num_assets)) # Long-only portfolio
|
| 185 |
+
initial_weights = num_assets * [1. / num_assets]
|
| 186 |
+
|
| 187 |
+
if target_return is not None:
|
| 188 |
+
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1},
|
| 189 |
+
{'type': 'eq', 'fun': lambda x: np.dot(x, returns) - target_return})
|
| 190 |
+
|
| 191 |
+
result = sco.minimize(
|
| 192 |
+
lambda w: self.calculate_portfolio_metrics(w, returns, cov_matrix)[1],
|
| 193 |
+
initial_weights,
|
| 194 |
+
method='SLSQP',
|
| 195 |
+
bounds=bounds,
|
| 196 |
+
constraints=constraints
|
| 197 |
+
)
|
| 198 |
+
return result.x
|
| 199 |
+
|
| 200 |
+
def process_period(self, data, actual_data, cov_matrix, target_return=None, allow_short=False):
|
| 201 |
+
# Forecast and optimize portfolio for each point T -> T+1 in validation and test data
|
| 202 |
+
realized_returns = []
|
| 203 |
+
predicted_returns = []
|
| 204 |
+
realized_volatilities = []
|
| 205 |
+
predicted_volatilities = []
|
| 206 |
+
for i in range(len(data) - 1):
|
| 207 |
+
current_data = data.iloc[i:i + 2] # Include current day and prediction for next day
|
| 208 |
+
actual_current_data = actual_data.iloc[i:i + 2] # Actual prices for T and T+1
|
| 209 |
+
# Calculate predicted return using actual price at T and predicted price at T+1
|
| 210 |
+
predicted_return = (current_data.drop(columns=['date']).iloc[1]-
|
| 211 |
+
actual_current_data.drop(columns=['date']).iloc[0]) / actual_current_data.drop(columns=['date']).iloc[0]
|
| 212 |
+
# Optimize portfolio based on predicted returns
|
| 213 |
+
self.weights = self.optimize(predicted_return, cov_matrix, target_return=target_return,
|
| 214 |
+
allow_short=allow_short)
|
| 215 |
+
pred_return, pred_volatility = self.calculate_portfolio_metrics(weights=self.weights, returns=predicted_return,
|
| 216 |
+
cov_matrix=cov_matrix)
|
| 217 |
+
# Compute realized return using actual prices for T and T+1
|
| 218 |
+
realized_return = (actual_current_data.drop(columns=['date']).iloc[1] -
|
| 219 |
+
actual_current_data.drop(columns=['date']).iloc[0]) / actual_current_data.drop(columns=['date']).iloc[0]
|
| 220 |
+
|
| 221 |
+
real_return, real_volatility = self.calculate_portfolio_metrics(weights=self.weights, returns=realized_return,
|
| 222 |
+
cov_matrix=cov_matrix)
|
| 223 |
+
realized_returns.append(real_return)
|
| 224 |
+
predicted_returns.append(pred_return)
|
| 225 |
+
realized_volatilities.append(real_volatility)
|
| 226 |
+
predicted_volatilities.append(pred_volatility)
|
| 227 |
+
return predicted_returns, realized_returns, predicted_volatilities, realized_volatilities
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Calculate accuracy metrics for validation and test sets
|
| 231 |
+
def calculate_accuracy(self, predicted, realized):
|
| 232 |
+
return np.mean(np.abs(np.array(predicted) - np.array(realized))) / np.mean(realized)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
# Calculate Sharpe ratio deviation
|
| 236 |
+
def calculate_sharpe_ratio_deviation(self, predicted_returns, realized_returns, predicted_vol, realized_vol):
|
| 237 |
+
predicted_sharpe = np.mean(predicted_returns) / np.mean(predicted_vol)
|
| 238 |
+
realized_sharpe = np.mean(realized_returns) / np.mean(realized_vol)
|
| 239 |
+
return abs(predicted_sharpe - realized_sharpe)
|
| 240 |
+
|
| 241 |
+
def optimize_portfolio(self, cov_matrix, validation_data, validation_actual, test_data, test_actual, target_return: int | None = None, allow_short: bool = False):
|
| 242 |
+
|
| 243 |
+
# Calculate validation metrics
|
| 244 |
+
self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol = self.process_period(data=validation_data,
|
| 245 |
+
actual_data=validation_actual,
|
| 246 |
+
cov_matrix=cov_matrix,
|
| 247 |
+
target_return=target_return,
|
| 248 |
+
allow_short=allow_short)
|
| 249 |
+
self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol = self.process_period(data=test_data,
|
| 250 |
+
actual_data=test_actual,
|
| 251 |
+
cov_matrix=cov_matrix,
|
| 252 |
+
target_return=target_return,
|
| 253 |
+
allow_short=allow_short)
|
| 254 |
+
|
| 255 |
+
#print(self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol)
|
| 256 |
+
#print(self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
self.val_return_accuracy = self.calculate_accuracy(self.val_pred_returns, self.val_realized_returns)
|
| 260 |
+
self.val_volatility_accuracy = self.calculate_accuracy(self.val_pred_vol, self.val_realized_vol)
|
| 261 |
+
self.val_sharpe_deviation = self.calculate_sharpe_ratio_deviation(self.val_pred_returns, self.val_realized_returns, self.val_pred_vol, self.val_realized_vol)
|
| 262 |
+
self.val_sum_pred_returns = np.sum(self.val_pred_returns)
|
| 263 |
+
self.val_sum_realized_returns = np.sum(self.val_realized_returns)
|
| 264 |
+
|
| 265 |
+
self.test_return_accuracy = self.calculate_accuracy(self.test_pred_returns, self.test_realized_returns)
|
| 266 |
+
self.test_volatility_accuracy = self.calculate_accuracy(self.test_pred_vol, self.test_realized_vol)
|
| 267 |
+
self.test_sharpe_deviation = self.calculate_sharpe_ratio_deviation(self.test_pred_returns, self.test_realized_returns, self.test_pred_vol, self.test_realized_vol)
|
| 268 |
+
self.test_sum_pred_returns = np.sum(self.test_pred_returns)
|
| 269 |
+
self.test_sum_realized_returns = np.sum(self.test_realized_returns)
|
| 270 |
+
|
| 271 |
+
print(f"Validation Return Accuracy: {self.val_return_accuracy}")
|
| 272 |
+
print(f"Validation Volatility Accuracy: {self.val_volatility_accuracy}")
|
| 273 |
+
print(f"Validation Sharpe Ratio Deviation: {self.val_sharpe_deviation}")
|
| 274 |
+
print(f"Validation Pred Return Sum: {self.val_sum_pred_returns}")
|
| 275 |
+
print(f"Validation Actual Return Sum: {self.val_sum_realized_returns}")
|
| 276 |
+
|
| 277 |
+
print(f"Test Return Accuracy: {self.test_return_accuracy}")
|
| 278 |
+
print(f"Test Volatility Accuracy: {self.test_volatility_accuracy}")
|
| 279 |
+
print(f"Test Sharpe Ratio Deviation: {self.test_sharpe_deviation}")
|
| 280 |
+
print(f"Test Pred Return Sum: {self.test_sum_pred_returns}")
|
| 281 |
+
print(f"Test Actual Return Sum: {self.test_sum_realized_returns}")
|
| 282 |
+
|
| 283 |
+
#return val_return_accuracy, val_volatility_accuracy, val_sharpe_deviation, np.sum(val_pred_vol), np.sum(val_realized_returns), test_return_accuracy, test_volatility_accuracy, test_sharpe_deviation, np.sum(test_pred_vol), np.sum(test_realized_returns)
|
src/pages/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
src/pages/1_Model_optimization.py
ADDED
|
@@ -0,0 +1,688 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import math
|
| 4 |
+
|
| 5 |
+
# For Evalution we will use these library
|
| 6 |
+
|
| 7 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
| 8 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 9 |
+
|
| 10 |
+
# For model building we will use these library
|
| 11 |
+
|
| 12 |
+
from tensorflow.keras.models import Sequential
|
| 13 |
+
from tensorflow.keras.layers import Dense
|
| 14 |
+
from tensorflow.keras.layers import LSTM
|
| 15 |
+
from tensorflow.keras import initializers
|
| 16 |
+
from tensorflow.keras.callbacks import EarlyStopping
|
| 17 |
+
|
| 18 |
+
# For PLotting we will use these library
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
|
| 21 |
+
import yfinance as yf
|
| 22 |
+
|
| 23 |
+
import streamlit as st
|
| 24 |
+
|
| 25 |
+
from gmdh import CriterionType, Criterion, Multi, Combi, Mia, Ria, PolynomialType
|
| 26 |
+
from chronos import ChronosPipeline
|
| 27 |
+
import torch
|
| 28 |
+
import pmdarima as pm
|
| 29 |
+
from pages.utils.utils import create_dataset, make_prediction, make_prediction_recursive
|
| 30 |
+
|
| 31 |
+
from io import StringIO
|
| 32 |
+
|
| 33 |
+
st.set_page_config(
|
| 34 |
+
page_title="Model optimization",
|
| 35 |
+
page_icon="📈")
|
| 36 |
+
|
| 37 |
+
@st.cache_data
|
| 38 |
+
def get_pipeline():
|
| 39 |
+
pipeline = ChronosPipeline.from_pretrained(
|
| 40 |
+
"amazon/chronos-t5-tiny",
|
| 41 |
+
device_map="cpu", # use "cpu" for CPU inference and "mps" for Apple Silicon
|
| 42 |
+
torch_dtype=torch.bfloat16)
|
| 43 |
+
return pipeline
|
| 44 |
+
|
| 45 |
+
pipeline = get_pipeline()
|
| 46 |
+
seed = 42
|
| 47 |
+
st.title("Daily price prediction")
|
| 48 |
+
tickers = ['BTC', 'ETH', 'BNB', #'USDC',
|
| 49 |
+
'XRP', 'STETH','ADA','DOGE',#'FGC',
|
| 50 |
+
'WTRX','LTC','SOL','TRX','DOT','MATIC','BCH','WBTC','TON11419',
|
| 51 |
+
'DAI','SHIB','AVAX','BUSD','LEO','LINK']
|
| 52 |
+
intervals = ['1d']#, '5d', '1wk', '1mo', '3mo'] #['1m', '2m', '5m','15m','30m','60m','90m','1h','1d','5d','1wk','1mo','3mo']
|
| 53 |
+
ticker = st.selectbox("Ticker", options=tickers)
|
| 54 |
+
interval = st.selectbox("Interval of raw data", options = intervals)
|
| 55 |
+
|
| 56 |
+
int_to_periods = {'1m':'5d', '2m':'1mo', '5m': '1mo','15m': '1mo','30m': '1mo','60m': '1mo','90m': '1mo',
|
| 57 |
+
'1h': '1y','1d': '10y','5d': '10y','1wk': '10y','1mo': '10y','3mo': '10y'}
|
| 58 |
+
|
| 59 |
+
period_cut = {'1d': '2022-02-19', '5d': '2020-06-19', '1wk': '2020-06-19', '1mo': '2014-06-19', '3mo': '2014-06-19'}
|
| 60 |
+
|
| 61 |
+
uploaded_file = st.file_uploader("Choose a file")
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
maindf = yf.download(tickers = f"{ticker}-USD", # list of tickers
|
| 66 |
+
period = int_to_periods[interval], # time period
|
| 67 |
+
interval = interval, # trading interval
|
| 68 |
+
prepost = False, # download pre/post market hours data?
|
| 69 |
+
repair = True,) # repair obvious price errors e.g. 100x?
|
| 70 |
+
if len(maindf) == 0:
|
| 71 |
+
raise FileNotFoundError
|
| 72 |
+
except:
|
| 73 |
+
maindf = pd.read_csv(f'{ticker}.csv')
|
| 74 |
+
|
| 75 |
+
if uploaded_file is not None:
|
| 76 |
+
# To read file as bytes:
|
| 77 |
+
bytes_data = uploaded_file.getvalue()
|
| 78 |
+
# To convert to a string based IO:
|
| 79 |
+
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
|
| 80 |
+
# To read file as string:
|
| 81 |
+
string_data = stringio.read()
|
| 82 |
+
|
| 83 |
+
# Can be used wherever a "file-like" object is accepted:
|
| 84 |
+
maindf = pd.read_csv(uploaded_file)
|
| 85 |
+
st.write(maindf.head())
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
maindf=maindf.reset_index()
|
| 90 |
+
maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d')
|
| 91 |
+
|
| 92 |
+
#maindf = pd.read_csv('BTC-USD.csv')
|
| 93 |
+
print('Total number of days present in the dataset: ',maindf.shape[0])
|
| 94 |
+
print('Total number of fields present in the dataset: ',maindf.shape[1])
|
| 95 |
+
print(maindf.head())
|
| 96 |
+
|
| 97 |
+
y_overall = maindf.copy()#.loc[(maindf['Date'] >= '2014-09-17')]
|
| 98 |
+
#& (maindf['Date'] <= '2022-02-19')]
|
| 99 |
+
|
| 100 |
+
global_expander = st.sidebar.expander('Параметры режима моделирования')
|
| 101 |
+
scaling_expander= st.sidebar.expander('Режим масштабирования')
|
| 102 |
+
scaling_strategy_list = ['median', 'average', 'undersampling']
|
| 103 |
+
scale_step_type_list = ['D','W','M','Y']
|
| 104 |
+
scale_step_type = scaling_expander.selectbox('Шаг масштабирования', scale_step_type_list)
|
| 105 |
+
num_scale_steps = scaling_expander.slider('Размер шага масштабирования', 1, 100, 1)
|
| 106 |
+
|
| 107 |
+
y_overall = y_overall[['Date','Close']]
|
| 108 |
+
if num_scale_steps > 1:
|
| 109 |
+
scaling_strategy = scaling_expander.selectbox('Метод масштабирования', scaling_strategy_list)
|
| 110 |
+
scaling_step_combined = str(num_scale_steps) + scale_step_type
|
| 111 |
+
# Определяем сегодняшнюю дату
|
| 112 |
+
today = pd.Timestamp.now().normalize()
|
| 113 |
+
if scaling_strategy == 'average':
|
| 114 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
|
| 115 |
+
# Добавляем колонку для конца интервала
|
| 116 |
+
y_overall['Interval_End'] = today - (
|
| 117 |
+
(today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
|
| 118 |
+
scaling_step_combined)
|
| 119 |
+
# Группируем по интервалам и считаем среднее
|
| 120 |
+
y_overall = y_overall.groupby('Interval_End')['Close'].mean().reset_index()
|
| 121 |
+
# Сортируем результат
|
| 122 |
+
y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
|
| 123 |
+
y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
|
| 124 |
+
elif scaling_strategy == 'median':
|
| 125 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).median()
|
| 126 |
+
# y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean()
|
| 127 |
+
# Добавляем колонку для конца интервала
|
| 128 |
+
y_overall['Interval_End'] = today - (
|
| 129 |
+
(today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta(
|
| 130 |
+
scaling_step_combined)
|
| 131 |
+
# Группируем по интервалам и считаем среднее
|
| 132 |
+
y_overall = y_overall.groupby('Interval_End')['Close'].median().reset_index()
|
| 133 |
+
# Сортируем результат
|
| 134 |
+
y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True)
|
| 135 |
+
y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1)
|
| 136 |
+
else:
|
| 137 |
+
# y_overall = y_overall.resample(on = 'Date', rule = scaling_step_combined).last()
|
| 138 |
+
# Устанавливаем 'Date' как индекс, если это ещё не сделано
|
| 139 |
+
# y_overall = y_overall.set_index('Date')
|
| 140 |
+
# y_overall.columns = y_overall.columns.droplevel(1)
|
| 141 |
+
y_overall = y_overall.resample(on='Date', rule=scaling_step_combined, origin='end').last()
|
| 142 |
+
y_overall = y_overall.reset_index()
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
#names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price'])
|
| 146 |
+
fig, ax = plt.subplots()
|
| 147 |
+
#ax.plot(y_overall.Date, y_overall['Close'], label = 'Stock Close Price')
|
| 148 |
+
ax.plot(y_overall['Close'], label = 'Stock Close Price')
|
| 149 |
+
|
| 150 |
+
ax.legend()
|
| 151 |
+
ax.set_title(f'Динамика цены закрытия для {ticker}')
|
| 152 |
+
|
| 153 |
+
#st.image(fig)
|
| 154 |
+
st.pyplot(fig)
|
| 155 |
+
#fig.show()
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
train = st.sidebar.button('Train')
|
| 161 |
+
time_step_backward = st.sidebar.slider('Количество шагов назад для предикторов', 5, 60, 15)
|
| 162 |
+
time_step_forward = st.sidebar.slider('Количество шагов вперед для таргета', 1, 60, 1)
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
pred_days = 1
|
| 166 |
+
recursive_pred = False
|
| 167 |
+
if time_step_forward == 1:
|
| 168 |
+
expander = st.sidebar.expander('Режим ресурсивного прогноза')
|
| 169 |
+
pred_days = expander.slider('Количество шагов для ресурсивного прогноза', 1, 30, 15)
|
| 170 |
+
recursive_pred = expander.checkbox('Запустить рекурсивный прогноз')
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
GMDH = st.sidebar.checkbox('Добавить режим МГУА')
|
| 175 |
+
transformer = st.sidebar.checkbox('Добавить режим Transformer')
|
| 176 |
+
if GMDH:
|
| 177 |
+
expander1 = st.sidebar.expander('Гиперпараметры МГУА')
|
| 178 |
+
GMDHs = {'Combi': Combi(), 'Multi': Multi(), 'Mia': Mia(), 'Ria': Ria()}
|
| 179 |
+
criterions = {'Критерий регулярности (несимметричная форма)': CriterionType.REGULARITY,
|
| 180 |
+
'Критерий регулярности (симметричная форма)': CriterionType.SYM_REGULARITY,
|
| 181 |
+
'Критерий стабильности (несимметричная форма)': CriterionType.STABILITY,
|
| 182 |
+
'Критерий стабильности (симметричная форма)': CriterionType.SYM_STABILITY,
|
| 183 |
+
'Критерий минимума смещения коэффициентов': CriterionType.UNBIASED_COEFFS,
|
| 184 |
+
'Критерий минимума смещения решений (несимметричная форма)': CriterionType.UNBIASED_OUTPUTS,
|
| 185 |
+
'Критерий минимума смещения решений (симметричная форма)': CriterionType.SYM_UNBIASED_OUTPUTS,
|
| 186 |
+
'Абсолютно помехоустойчивый критерий (несимметричная форма)': CriterionType.ABSOLUTE_NOISE_IMMUNITY,
|
| 187 |
+
'Абсолютно помехоустойчивый критерий (симметричная форма)': CriterionType.SYM_ABSOLUTE_NOISE_IMMUNITY}
|
| 188 |
+
polynoms = {'LINEAR': PolynomialType.LINEAR,
|
| 189 |
+
'LINEAR_COV': PolynomialType.LINEAR_COV,
|
| 190 |
+
'QUADRATIC': PolynomialType.QUADRATIC}
|
| 191 |
+
GMDH_algo = expander1.selectbox("Алгоритм МГУА", options = GMDHs.keys())
|
| 192 |
+
criterion = expander1.selectbox("Внешний критерий", options = criterions.keys())
|
| 193 |
+
p_average = expander1.slider('p_average', 1, 10, 1)
|
| 194 |
+
limit = expander1.number_input('limit', value = 0.)
|
| 195 |
+
k_best = expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1)
|
| 196 |
+
polynom = expander1.selectbox("Вид базовых полиномов", options = polynoms.keys())
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
y_overall.columns = y_overall.columns.droplevel(1)#.droplevel()
|
| 200 |
+
#y_overall = y_overall.reset_index()
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
if train:
|
| 204 |
+
my_bar = st.progress(0, text='Model training progress. Truncating the dataset now')
|
| 205 |
+
# Lets First Take all the Close Price
|
| 206 |
+
closedf = y_overall[['Date', 'Close']]#maindf[['Date', 'Close']]
|
| 207 |
+
print("Shape of close dataframe:", closedf.shape)
|
| 208 |
+
closedf = closedf[-1000:]#closedf[closedf['Date'] > period_cut[interval]]
|
| 209 |
+
close_stock = closedf.copy()
|
| 210 |
+
print("Total data for prediction: ", closedf.shape[0])
|
| 211 |
+
my_bar.progress(10 + 1, text='Truncated the dataset -> Scaling it')
|
| 212 |
+
# deleting date column and normalizing using MinMax Scaler
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
del closedf['Date']
|
| 216 |
+
scaler = MinMaxScaler(feature_range=(0, 1))
|
| 217 |
+
#closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1))
|
| 218 |
+
print(closedf.shape)
|
| 219 |
+
|
| 220 |
+
my_bar.progress(20 + 1, text='Scaled the dataset -> Splitting it into subsamples')
|
| 221 |
+
# we keep the training set as 60% and 40% testing set
|
| 222 |
+
|
| 223 |
+
training_size = int(len(closedf) * 0.70)
|
| 224 |
+
test_size = len(closedf) - training_size
|
| 225 |
+
assert test_size > time_step_backward + time_step_forward, "Test_size is shorter than time_step_backward + time_step_forward"
|
| 226 |
+
train_data, test_data = closedf[0:training_size], closedf[training_size:len(closedf)]
|
| 227 |
+
train_data = scaler.fit_transform(train_data)
|
| 228 |
+
test_data = scaler.transform(test_data)
|
| 229 |
+
print("train_data: ", train_data.shape)
|
| 230 |
+
print("test_data: ", test_data.shape)
|
| 231 |
+
|
| 232 |
+
my_bar.progress(30 + 1, text='Split it into subsamples -> Cutting them into observations')
|
| 233 |
+
|
| 234 |
+
X_train, y_train = create_dataset(train_data, time_step_backward, time_step_forward)
|
| 235 |
+
X_test, y_test = create_dataset(test_data, time_step_backward, time_step_forward)
|
| 236 |
+
|
| 237 |
+
print("X_train: ", X_train.shape)
|
| 238 |
+
print("y_train: ", y_train.shape)
|
| 239 |
+
print("X_test: ", X_test.shape)
|
| 240 |
+
print("y_test", y_test.shape)
|
| 241 |
+
|
| 242 |
+
# reshape input to be [samples, time steps, features] which is required for LSTM
|
| 243 |
+
X_train_gmdh = X_train.copy()
|
| 244 |
+
X_test_gmdh = X_test.copy()
|
| 245 |
+
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
| 246 |
+
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
|
| 247 |
+
|
| 248 |
+
print("X_train: ", X_train.shape)
|
| 249 |
+
print("X_test: ", X_test.shape)
|
| 250 |
+
|
| 251 |
+
my_bar.progress(40 + 1, text='Cut it into observations -> Training the model')
|
| 252 |
+
model = Sequential()
|
| 253 |
+
model.add(LSTM(10, input_shape=(None, 1), activation="relu",
|
| 254 |
+
kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed)))
|
| 255 |
+
model.add(Dense(1,
|
| 256 |
+
kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed)))
|
| 257 |
+
model.compile(loss="mean_squared_error", optimizer="adam")
|
| 258 |
+
callback = EarlyStopping(monitor='loss', patience=30, restore_best_weights = True)
|
| 259 |
+
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=10,
|
| 260 |
+
callbacks = [callback])
|
| 261 |
+
|
| 262 |
+
arima_model = pm.auto_arima(train_data,
|
| 263 |
+
m=12, # frequency of series
|
| 264 |
+
seasonal=True, # TRUE if seasonal series
|
| 265 |
+
d=None, # let model determine 'd'
|
| 266 |
+
test='adf', # use adftest to find optimal 'd'
|
| 267 |
+
start_p=0, start_q=0, # minimum p and q
|
| 268 |
+
max_p=time_step_backward, max_q=time_step_backward, # maximum p and q
|
| 269 |
+
D=None, # let model determine 'D'
|
| 270 |
+
trace=True,
|
| 271 |
+
error_action='ignore',
|
| 272 |
+
suppress_warnings=True,
|
| 273 |
+
stepwise=True)
|
| 274 |
+
st.text(arima_model.summary())
|
| 275 |
+
|
| 276 |
+
if GMDH:
|
| 277 |
+
model_gmdh = GMDHs[GMDH_algo]
|
| 278 |
+
if GMDH_algo == 'Combi':
|
| 279 |
+
model_gmdh.fit(X_train_gmdh, y_train, p_average = p_average, limit = limit, test_size=0.3,
|
| 280 |
+
criterion = Criterion(criterion_type = criterions[criterion]))
|
| 281 |
+
if GMDH_algo == 'Multi':
|
| 282 |
+
model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3,
|
| 283 |
+
criterion=Criterion(criterion_type=criterions[criterion]),
|
| 284 |
+
k_best = k_best)
|
| 285 |
+
if GMDH_algo in ['Ria', 'Mia']:
|
| 286 |
+
model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3,
|
| 287 |
+
criterion=Criterion(criterion_type=criterions[criterion]),
|
| 288 |
+
k_best = k_best, polynomial_type = polynoms[polynom])
|
| 289 |
+
st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}")
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
my_bar.progress(70 + 1, text='Trained model -> Calculating loss')
|
| 293 |
+
import matplotlib.pyplot as plt
|
| 294 |
+
|
| 295 |
+
loss = history.history['loss']
|
| 296 |
+
val_loss = history.history['val_loss']
|
| 297 |
+
|
| 298 |
+
epochs = range(len(loss))
|
| 299 |
+
|
| 300 |
+
fig, ax = plt.subplots()
|
| 301 |
+
ax.plot(epochs, loss, 'r', label='Training loss')
|
| 302 |
+
ax.plot(epochs, val_loss, 'b', label='Validation loss')
|
| 303 |
+
ax.legend()
|
| 304 |
+
ax.set_title('Потери на обучении и валидации')
|
| 305 |
+
#ax.set_ylim[0, 0.2]
|
| 306 |
+
st.pyplot(fig)
|
| 307 |
+
|
| 308 |
+
my_bar.progress(80 + 1, text='Calculated loss -> Scoring the dataset')
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1))
|
| 312 |
+
original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1))
|
| 313 |
+
|
| 314 |
+
train_predict, test_predict = make_prediction(X_train, X_test, method='LSTM', model=model,
|
| 315 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 316 |
+
train_predict_arima, test_predict_arima = make_prediction(X_train, X_test, method='SARIMA', model=arima_model,
|
| 317 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 318 |
+
if GMDH:
|
| 319 |
+
train_predict_gmdh, test_predict_gmdh = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH', model=model_gmdh,
|
| 320 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 321 |
+
if transformer:
|
| 322 |
+
X_train_forecast_median, X_test_forecast_median = make_prediction(X_train_gmdh, X_test_gmdh, method='Transformer', model=pipeline,
|
| 323 |
+
scaler=scaler, time_step_forward=time_step_forward)
|
| 324 |
+
|
| 325 |
+
my_bar.progress(85 + 1, text='Scored the dataset -> Calculating perfomance metrics')
|
| 326 |
+
|
| 327 |
+
# Evaluation metrices RMSE and MAE
|
| 328 |
+
metrics_tmp = {}
|
| 329 |
+
metrics1 = {}
|
| 330 |
+
metrics1['LSTM'] = []
|
| 331 |
+
#metrics1['Transformer'] = []
|
| 332 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict))
|
| 333 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict)
|
| 334 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict)
|
| 335 |
+
print("-------------------------------------------------------------------------------------")
|
| 336 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict))
|
| 337 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict)
|
| 338 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict)
|
| 339 |
+
#metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
|
| 340 |
+
#metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
|
| 341 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict)
|
| 342 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict)
|
| 343 |
+
for metric in metrics_tmp:
|
| 344 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 345 |
+
metrics1['LSTM'].append(metrics_tmp[metric])
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
metrics1['SARIMA'] = []
|
| 349 |
+
# metrics1['Transformer'] = []
|
| 350 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_arima))
|
| 351 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_arima)
|
| 352 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_arima)
|
| 353 |
+
print("-------------------------------------------------------------------------------------")
|
| 354 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_arima))
|
| 355 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_arima)
|
| 356 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_arima)
|
| 357 |
+
# metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
|
| 358 |
+
# metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
|
| 359 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_arima)
|
| 360 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_arima)
|
| 361 |
+
for metric in metrics_tmp:
|
| 362 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 363 |
+
metrics1['SARIMA'].append(metrics_tmp[metric])
|
| 364 |
+
if GMDH:
|
| 365 |
+
metrics1['GMDH'] = []
|
| 366 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh))
|
| 367 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh)
|
| 368 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh)
|
| 369 |
+
print("-------------------------------------------------------------------------------------")
|
| 370 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh))
|
| 371 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh)
|
| 372 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh)
|
| 373 |
+
#metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
|
| 374 |
+
#metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
|
| 375 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh)
|
| 376 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh)
|
| 377 |
+
for metric in metrics_tmp:
|
| 378 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 379 |
+
metrics1['GMDH'].append(metrics_tmp[metric])
|
| 380 |
+
|
| 381 |
+
if transformer:
|
| 382 |
+
metrics1['Transformer'] = []
|
| 383 |
+
metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, X_train_forecast_median))
|
| 384 |
+
metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, X_train_forecast_median)
|
| 385 |
+
metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, X_train_forecast_median)
|
| 386 |
+
print("-------------------------------------------------------------------------------------")
|
| 387 |
+
metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, X_test_forecast_median))
|
| 388 |
+
metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, X_test_forecast_median)
|
| 389 |
+
metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, X_test_forecast_median)
|
| 390 |
+
# metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict)
|
| 391 |
+
# metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict)
|
| 392 |
+
metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, X_train_forecast_median)
|
| 393 |
+
metrics_tmp["Test data R2 score"] = r2_score(original_ytest, X_test_forecast_median)
|
| 394 |
+
for metric in metrics_tmp:
|
| 395 |
+
print(metric, ': ', metrics_tmp[metric])
|
| 396 |
+
metrics1['Transformer'].append(metrics_tmp[metric])
|
| 397 |
+
|
| 398 |
+
metrics_df = pd.DataFrame.from_dict(metrics1, orient = 'columns')#(metrics, columns = ['LSTM', 'GMDH'])
|
| 399 |
+
metrics_df.index = metrics_tmp.keys()
|
| 400 |
+
st.write(metrics_df)
|
| 401 |
+
#print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict))
|
| 402 |
+
#print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict))
|
| 403 |
+
#print("----------------------------------------------------------------------")
|
| 404 |
+
#print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict))
|
| 405 |
+
#print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict))
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
my_bar.progress(90 + 1, text='Calculated performance metrics -> Plotting predictions')
|
| 409 |
+
|
| 410 |
+
# shift train predictions for plotting
|
| 411 |
+
|
| 412 |
+
lag = time_step_backward + (time_step_forward - 1)
|
| 413 |
+
trainPredictPlot_arima = np.empty_like(closedf)
|
| 414 |
+
trainPredictPlot_arima[:, :] = np.nan
|
| 415 |
+
trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :] = train_predict_arima
|
| 416 |
+
print(trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :].shape, train_predict_arima.shape)
|
| 417 |
+
print("Train predicted data: ", trainPredictPlot_arima.shape)
|
| 418 |
+
|
| 419 |
+
# shift test predictions for plotting
|
| 420 |
+
testPredictPlot_arima = np.empty_like(closedf)
|
| 421 |
+
testPredictPlot_arima[:, :] = np.nan
|
| 422 |
+
testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :] = test_predict_arima
|
| 423 |
+
print(testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :].shape, test_predict_arima.shape)
|
| 424 |
+
print("Test predicted data: ", testPredictPlot_arima.shape)
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
trainPredictPlot = np.empty_like(closedf)
|
| 429 |
+
trainPredictPlot[:, :] = np.nan
|
| 430 |
+
trainPredictPlot[lag:len(train_predict) + lag, :] = train_predict
|
| 431 |
+
print(trainPredictPlot[lag:len(train_predict) + lag, :].shape, train_predict.shape)
|
| 432 |
+
print("Train predicted data: ", trainPredictPlot.shape)
|
| 433 |
+
|
| 434 |
+
# shift test predictions for plotting
|
| 435 |
+
testPredictPlot = np.empty_like(closedf)
|
| 436 |
+
testPredictPlot[:, :] = np.nan
|
| 437 |
+
testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :] = test_predict
|
| 438 |
+
print(testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :].shape, test_predict.shape)
|
| 439 |
+
print("Test predicted data: ", testPredictPlot.shape)
|
| 440 |
+
|
| 441 |
+
if GMDH:
|
| 442 |
+
trainPredictPlot_gmdh = np.empty_like(closedf)
|
| 443 |
+
trainPredictPlot_gmdh[:, :] = np.nan
|
| 444 |
+
trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :] = train_predict_gmdh
|
| 445 |
+
print(trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :].shape, train_predict_gmdh.shape)
|
| 446 |
+
|
| 447 |
+
testPredictPlot_gmdh = np.empty_like(closedf)
|
| 448 |
+
testPredictPlot_gmdh[:, :] = np.nan
|
| 449 |
+
testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :] = test_predict_gmdh
|
| 450 |
+
print(testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :].shape, test_predict_gmdh.shape)
|
| 451 |
+
|
| 452 |
+
if transformer:
|
| 453 |
+
trainPredictPlot_transformer = np.empty_like(closedf)
|
| 454 |
+
trainPredictPlot_transformer[:, :] = np.nan
|
| 455 |
+
trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :] = X_train_forecast_median
|
| 456 |
+
print(trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :].shape,
|
| 457 |
+
X_train_forecast_median.shape)
|
| 458 |
+
|
| 459 |
+
testPredictPlot_transformer = np.empty_like(closedf)
|
| 460 |
+
testPredictPlot_transformer[:, :] = np.nan
|
| 461 |
+
testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :] = X_test_forecast_median
|
| 462 |
+
print(testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :].shape,
|
| 463 |
+
X_test_forecast_median.shape)
|
| 464 |
+
|
| 465 |
+
if GMDH:
|
| 466 |
+
if transformer:
|
| 467 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 468 |
+
'original_close': close_stock['Close'],
|
| 469 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 470 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 471 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 472 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 473 |
+
'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
|
| 474 |
+
'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
|
| 475 |
+
'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
|
| 476 |
+
'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
|
| 477 |
+
elif not transformer:
|
| 478 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 479 |
+
'original_close': close_stock['Close'],
|
| 480 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 481 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 482 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 483 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 484 |
+
'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(),
|
| 485 |
+
'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist()})
|
| 486 |
+
elif not GMDH:
|
| 487 |
+
if transformer:
|
| 488 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 489 |
+
'original_close': close_stock['Close'],
|
| 490 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 491 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 492 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 493 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(),
|
| 494 |
+
'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(),
|
| 495 |
+
'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()})
|
| 496 |
+
else:
|
| 497 |
+
plotdf = pd.DataFrame({'date': close_stock['Date'],
|
| 498 |
+
'original_close': close_stock['Close'],
|
| 499 |
+
'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 500 |
+
'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(),
|
| 501 |
+
'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(),
|
| 502 |
+
'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()})
|
| 503 |
+
fig, ax = plt.subplots()
|
| 504 |
+
ax.plot(plotdf['date'], plotdf['original_close'], label='Оригинальная цена закрытия')
|
| 505 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_arima'], label='Предсказанная цена закрытия на тренировке SARIMA')
|
| 506 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_arima'], label='Предсказанная цена закрытия на тесте SARIMA')
|
| 507 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close'], label='Предсказанная цена закрытия на тренировке')
|
| 508 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close'], label='Предсказанная цена закрытия на тесте')
|
| 509 |
+
if GMDH:
|
| 510 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh'], label='Предсказанная цена закрытия на тренировке GMDH')
|
| 511 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh'], label='Предсказанная цена закрытия на тесте GMDH')
|
| 512 |
+
if transformer:
|
| 513 |
+
ax.plot(plotdf['date'], plotdf['train_predicted_close_transformer'], label='Предсказанная цена закрытия на тренировке Transformer')
|
| 514 |
+
ax.plot(plotdf['date'], plotdf['test_predicted_close_transformer'], label='Предсказанная цена закрытия на тесте Transformer')
|
| 515 |
+
ax.legend()
|
| 516 |
+
ax.set_title("Сравнение исходных и смоделированных цен")
|
| 517 |
+
st.pyplot(fig)
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
my_bar.progress(100, text='Done')
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
if recursive_pred:
|
| 524 |
+
lst_output_arima = make_prediction_recursive(test_data=test_data, method='SARIMA', model=arima_model,
|
| 525 |
+
scaler=scaler, pred_days=pred_days,
|
| 526 |
+
time_step_backward=time_step_backward)
|
| 527 |
+
lst_output_lstm = make_prediction_recursive(test_data=test_data, method='LSTM', model=model,
|
| 528 |
+
scaler=scaler, pred_days=pred_days,
|
| 529 |
+
time_step_backward=time_step_backward)
|
| 530 |
+
if GMDH:
|
| 531 |
+
lst_output_gmdh = make_prediction_recursive(test_data=test_data, method='GMDH', model=model_gmdh,
|
| 532 |
+
scaler=scaler, pred_days=pred_days,
|
| 533 |
+
time_step_backward=time_step_backward)
|
| 534 |
+
if transformer:
|
| 535 |
+
lst_output_transformer = make_prediction_recursive(test_data=test_data, method='Transformer', model=pipeline,
|
| 536 |
+
scaler=scaler, pred_days=pred_days,
|
| 537 |
+
time_step_backward=time_step_backward)
|
| 538 |
+
|
| 539 |
+
"""
|
| 540 |
+
x_input = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
|
| 541 |
+
temp_input = list(x_input)
|
| 542 |
+
temp_input = temp_input[0].tolist()
|
| 543 |
+
|
| 544 |
+
|
| 545 |
+
lst_output = []
|
| 546 |
+
n_steps = time_step_backward
|
| 547 |
+
i = 0
|
| 548 |
+
while (i < pred_days):
|
| 549 |
+
|
| 550 |
+
if (len(temp_input) > time_step_backward):
|
| 551 |
+
|
| 552 |
+
x_input = np.array(temp_input[1:])
|
| 553 |
+
# print("{} day input {}".format(i,x_input))
|
| 554 |
+
x_input = x_input.reshape(1, -1)
|
| 555 |
+
x_input = x_input.reshape((1, n_steps, 1))
|
| 556 |
+
|
| 557 |
+
yhat = model.predict(x_input, verbose=0)
|
| 558 |
+
# print("{} day output {}".format(i,yhat))
|
| 559 |
+
temp_input.extend(yhat[0].tolist())
|
| 560 |
+
temp_input = temp_input[1:]
|
| 561 |
+
# print(temp_input)
|
| 562 |
+
|
| 563 |
+
lst_output.extend(yhat.tolist())
|
| 564 |
+
i = i + 1
|
| 565 |
+
|
| 566 |
+
else:
|
| 567 |
+
|
| 568 |
+
x_input = x_input.reshape((1, n_steps, 1))
|
| 569 |
+
yhat = model.predict(x_input, verbose=0)
|
| 570 |
+
temp_input.extend(yhat[0].tolist())
|
| 571 |
+
|
| 572 |
+
lst_output.extend(yhat.tolist())
|
| 573 |
+
i = i + 1
|
| 574 |
+
|
| 575 |
+
print("Output of predicted next steps: ", len(lst_output))
|
| 576 |
+
"""
|
| 577 |
+
last_days = np.arange(1, time_step_backward + 1)
|
| 578 |
+
day_pred = np.arange(time_step_backward + 1, time_step_backward + pred_days + 1)
|
| 579 |
+
print(last_days)
|
| 580 |
+
print(day_pred)
|
| 581 |
+
|
| 582 |
+
temp_mat = np.empty((len(last_days) + pred_days, 1))
|
| 583 |
+
temp_mat[:] = np.nan
|
| 584 |
+
"""
|
| 585 |
+
last_original_days_value = temp_mat.copy()
|
| 586 |
+
next_predicted_days_value = temp_mat.copy()
|
| 587 |
+
last_original_days_value[0:time_step_backward] = closedf[len(closedf) - time_step_backward:].values
|
| 588 |
+
next_predicted_days_value[time_step_backward:] = scaler.inverse_transform(np.array(lst_output))
|
| 589 |
+
"""
|
| 590 |
+
last_original_days_value = temp_mat.copy()
|
| 591 |
+
next_predicted_days_value_arima = temp_mat.copy()
|
| 592 |
+
next_predicted_days_value_lstm = temp_mat.copy()
|
| 593 |
+
if GMDH:
|
| 594 |
+
next_predicted_days_value_gmdh = temp_mat.copy()
|
| 595 |
+
if transformer:
|
| 596 |
+
next_predicted_days_value_transformer = temp_mat.copy()
|
| 597 |
+
|
| 598 |
+
last_original_days_value[0:time_step_backward] = \
|
| 599 |
+
closedf[len(closedf) - time_step_backward:].values
|
| 600 |
+
next_predicted_days_value_arima[time_step_backward:] = lst_output_arima
|
| 601 |
+
next_predicted_days_value_lstm[time_step_backward:] = lst_output_lstm
|
| 602 |
+
if GMDH:
|
| 603 |
+
next_predicted_days_value_gmdh[time_step_backward:] = lst_output_gmdh
|
| 604 |
+
if transformer:
|
| 605 |
+
next_predicted_days_value_transformer[time_step_backward:] = lst_output_transformer
|
| 606 |
+
|
| 607 |
+
"""
|
| 608 |
+
new_pred_plot = pd.DataFrame({
|
| 609 |
+
'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
|
| 610 |
+
'next_predicted_days_value': next_predicted_days_value.reshape(1, -1).tolist()[0]
|
| 611 |
+
})
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
|
| 615 |
+
fig, ax = plt.subplots()
|
| 616 |
+
ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'], label=f"Последние {time_step_backward} шагов цены закратия")
|
| 617 |
+
ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value'], label=f"Предсказанные следующие {pred_days} шагов цены закрытия")
|
| 618 |
+
ax.legend()
|
| 619 |
+
ax.set_title(f"Сравнения последних {time_step_backward} шагов и следующих {pred_days} шагов")
|
| 620 |
+
st.pyplot(fig)
|
| 621 |
+
"""
|
| 622 |
+
if GMDH:
|
| 623 |
+
if transformer:
|
| 624 |
+
new_pred_plot = pd.DataFrame({
|
| 625 |
+
'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
|
| 626 |
+
'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
|
| 627 |
+
'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
|
| 628 |
+
'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0],
|
| 629 |
+
'next_predicted_days_value_transformer':
|
| 630 |
+
next_predicted_days_value_transformer.reshape(1, -1).tolist()[0]
|
| 631 |
+
})
|
| 632 |
+
elif not transformer:
|
| 633 |
+
new_pred_plot = pd.DataFrame({
|
| 634 |
+
'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
|
| 635 |
+
'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
|
| 636 |
+
'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
|
| 637 |
+
'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0]
|
| 638 |
+
})
|
| 639 |
+
elif not GMDH:
|
| 640 |
+
if transformer:
|
| 641 |
+
new_pred_plot = pd.DataFrame({
|
| 642 |
+
'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
|
| 643 |
+
'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
|
| 644 |
+
'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0],
|
| 645 |
+
'next_predicted_days_value_transformer':
|
| 646 |
+
next_predicted_days_value_transformer.reshape(1, -1).tolist()[0]
|
| 647 |
+
})
|
| 648 |
+
else:
|
| 649 |
+
new_pred_plot = pd.DataFrame({
|
| 650 |
+
'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0],
|
| 651 |
+
'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0],
|
| 652 |
+
'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0]
|
| 653 |
+
})
|
| 654 |
+
fig, ax = plt.subplots()
|
| 655 |
+
ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'],
|
| 656 |
+
label=f"Последние {time_step_backward} шагов цены закратия")
|
| 657 |
+
ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_arima'],
|
| 658 |
+
label=f"Предсказанные следующие {pred_days} шагов цены закрытия SARIMA")
|
| 659 |
+
ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_lstm'],
|
| 660 |
+
label=f"Предсказанные следующие {pred_days} шагов цены закрытия LSTM")
|
| 661 |
+
if GMDH:
|
| 662 |
+
ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_gmdh'],
|
| 663 |
+
label=f"Предсказанные следующие {pred_days} шагов цены закрытия GMDH")
|
| 664 |
+
if transformer:
|
| 665 |
+
ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_transformer'],
|
| 666 |
+
label=f"Предсказанные следующие {pred_days} шагов цены закрытия Transformer")
|
| 667 |
+
ax.legend()
|
| 668 |
+
ax.set_title(f"Сравнения последних {time_step_backward} шагов и следующих {pred_days} шагов")
|
| 669 |
+
ax.set_ylim(0, closedf['Close'].max() * 1.5)
|
| 670 |
+
st.pyplot(fig)
|
| 671 |
+
#ax.plot()
|
| 672 |
+
|
| 673 |
+
|
| 674 |
+
@st.cache_data
|
| 675 |
+
def convert_df(df):
|
| 676 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
| 677 |
+
return df.to_csv().encode("utf-8")
|
| 678 |
+
@st.cache_data
|
| 679 |
+
def convert_metrics_df(df):
|
| 680 |
+
# IMPORTANT: Cache the conversion to prevent computation on every rerun
|
| 681 |
+
return df.to_csv().encode("utf-8")
|
| 682 |
+
|
| 683 |
+
plotdf_csv = convert_df(plotdf)
|
| 684 |
+
metrics_df_csv = convert_metrics_df(metrics_df)
|
| 685 |
+
st.download_button('Download data', plotdf_csv, file_name='predictions.csv', mime="text/csv")
|
| 686 |
+
st.download_button('Download metrics', metrics_df_csv, file_name='metrics.csv', mime="text/csv")
|
| 687 |
+
|
| 688 |
+
|
src/pages/utils/utils.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import Literal
|
| 3 |
+
import torch
|
| 4 |
+
from typing import List
|
| 5 |
+
|
| 6 |
+
def create_dataset(dataset, time_step_backward = 1, time_step_forward = 1):
|
| 7 |
+
dataX, dataY = [], []
|
| 8 |
+
for i in range(len(dataset) - time_step_backward - (time_step_forward - 1)):
|
| 9 |
+
a = dataset[i:(i + time_step_backward), 0] ###i=0, 0,1,2,3-----99 100
|
| 10 |
+
dataX.append(a)
|
| 11 |
+
dataY.append(dataset[i + time_step_backward + (time_step_forward - 1), 0])
|
| 12 |
+
return np.array(dataX), np.array(dataY)
|
| 13 |
+
|
| 14 |
+
def make_prediction(X_train: np.ndarray, X_test: np.ndarray,
|
| 15 |
+
method: Literal['LSTM', 'GMDH', 'Transformer', 'SARIMA'],
|
| 16 |
+
model, scaler, time_step_forward: None) -> np.ndarray:
|
| 17 |
+
if method == 'LSTM':
|
| 18 |
+
train_predict = model.predict(X_train)
|
| 19 |
+
test_predict = model.predict(X_test)
|
| 20 |
+
train_predict = scaler.inverse_transform(train_predict)
|
| 21 |
+
test_predict = scaler.inverse_transform(test_predict)
|
| 22 |
+
return train_predict, test_predict
|
| 23 |
+
elif method == 'SARIMA':
|
| 24 |
+
train_predict_arima = []
|
| 25 |
+
test_predict_arima = []
|
| 26 |
+
for sample in X_train:
|
| 27 |
+
train_predict_arima.append(
|
| 28 |
+
model.fit_predict(sample, n_periods=time_step_forward, return_conf_int=False)[-1])
|
| 29 |
+
train_predict_arima = np.array(train_predict_arima)
|
| 30 |
+
for sample in X_test:
|
| 31 |
+
test_predict_arima.append(
|
| 32 |
+
model.fit_predict(sample, n_periods=time_step_forward, return_conf_int=False)[-1])
|
| 33 |
+
test_predict_arima = np.array(test_predict_arima)
|
| 34 |
+
train_predict_arima = scaler.inverse_transform(train_predict_arima.reshape(-1, 1))
|
| 35 |
+
test_predict_arima = scaler.inverse_transform(test_predict_arima.reshape(-1, 1))
|
| 36 |
+
return train_predict_arima, test_predict_arima
|
| 37 |
+
elif method == 'GMDH':
|
| 38 |
+
train_predict_gmdh = model.predict(X_train)
|
| 39 |
+
test_predict_gmdh = model.predict(X_test)
|
| 40 |
+
train_predict_gmdh = scaler.inverse_transform(train_predict_gmdh.reshape(-1, 1))
|
| 41 |
+
test_predict_gmdh = scaler.inverse_transform(test_predict_gmdh.reshape(-1, 1))
|
| 42 |
+
return train_predict_gmdh, test_predict_gmdh
|
| 43 |
+
elif method == 'Transformer':
|
| 44 |
+
X_train_context = torch.tensor(X_train)
|
| 45 |
+
X_test_context = torch.tensor(X_test)
|
| 46 |
+
X_train_forecast = model.predict(
|
| 47 |
+
X_train_context,
|
| 48 |
+
time_step_forward,
|
| 49 |
+
num_samples=3,
|
| 50 |
+
temperature=1.0,
|
| 51 |
+
top_k=50,
|
| 52 |
+
top_p=1.0)
|
| 53 |
+
X_test_forecast = model.predict(
|
| 54 |
+
X_test_context,
|
| 55 |
+
time_step_forward,
|
| 56 |
+
num_samples=3,
|
| 57 |
+
temperature=1.0,
|
| 58 |
+
top_k=50,
|
| 59 |
+
top_p=1.0)
|
| 60 |
+
X_train_forecast_median = np.quantile(X_train_forecast.numpy(), 0.5, axis=1)[:, -1]
|
| 61 |
+
X_test_forecast_median = np.quantile(X_test_forecast.numpy(), 0.5, axis=1)[:, -1]
|
| 62 |
+
X_train_forecast_median = scaler.inverse_transform(X_train_forecast_median.reshape(-1, 1))
|
| 63 |
+
X_test_forecast_median = scaler.inverse_transform(X_test_forecast_median.reshape(-1, 1))
|
| 64 |
+
return X_train_forecast_median, X_test_forecast_median
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def make_prediction_recursive(test_data: np.ndarray,
|
| 71 |
+
method: Literal['LSTM', 'GMDH', 'Transformer', 'SARIMA'],
|
| 72 |
+
model, scaler, pred_days: None, time_step_backward: None) -> List[int]:
|
| 73 |
+
if method == 'LSTM':
|
| 74 |
+
x_input_lstm = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
|
| 75 |
+
temp_input_lstm = list(x_input_lstm)
|
| 76 |
+
temp_input_lstm = temp_input_lstm[0].tolist()
|
| 77 |
+
lst_output_lstm = []
|
| 78 |
+
n_steps = time_step_backward
|
| 79 |
+
i = 0
|
| 80 |
+
while (i < pred_days):
|
| 81 |
+
if (len(temp_input_lstm) > time_step_backward):
|
| 82 |
+
|
| 83 |
+
x_input_lstm = np.array(temp_input_lstm[1:])
|
| 84 |
+
x_input_lstm = x_input_lstm.reshape(1, -1)
|
| 85 |
+
x_input_lstm = x_input_lstm.reshape((1, n_steps, 1))
|
| 86 |
+
|
| 87 |
+
yhat_lstm = model.predict(x_input_lstm, verbose=0)
|
| 88 |
+
temp_input_lstm.extend(yhat_lstm[0].tolist())
|
| 89 |
+
temp_input_lstm = temp_input_lstm[1:]
|
| 90 |
+
lst_output_lstm.extend(yhat_lstm.tolist())
|
| 91 |
+
i = i + 1
|
| 92 |
+
else:
|
| 93 |
+
x_input_lstm = x_input_lstm.reshape((1, n_steps, 1))
|
| 94 |
+
yhat_lstm = model.predict(x_input_lstm, verbose=0)
|
| 95 |
+
temp_input_lstm.extend(yhat_lstm[0].tolist())
|
| 96 |
+
lst_output_lstm.extend(yhat_lstm.tolist())
|
| 97 |
+
i = i + 1
|
| 98 |
+
|
| 99 |
+
lst_output_lstm = scaler.inverse_transform(lst_output_lstm)
|
| 100 |
+
return lst_output_lstm
|
| 101 |
+
elif method == 'SARIMA':
|
| 102 |
+
x_input_arima = test_data[len(test_data) - time_step_backward:]
|
| 103 |
+
n_steps = time_step_backward
|
| 104 |
+
lst_output_arima = model.fit_predict(x_input_arima, n_periods=pred_days, return_conf_int=False) # [-1]
|
| 105 |
+
lst_output_arima = scaler.inverse_transform(lst_output_arima.reshape(-1, 1))
|
| 106 |
+
return lst_output_arima
|
| 107 |
+
elif method == 'GMDH':
|
| 108 |
+
x_input_gmdh = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
|
| 109 |
+
temp_input_gmdh = list(x_input_gmdh)
|
| 110 |
+
temp_input_gmdh = temp_input_gmdh[0].tolist()
|
| 111 |
+
lst_output_gmdh = []
|
| 112 |
+
n_steps = time_step_backward
|
| 113 |
+
i = 0
|
| 114 |
+
while (i < pred_days):
|
| 115 |
+
if (len(temp_input_gmdh) > time_step_backward):
|
| 116 |
+
x_input_gmdh = np.array(temp_input_gmdh[1:])
|
| 117 |
+
x_input_gmdh = x_input_gmdh.reshape(1, -1)
|
| 118 |
+
yhat_gmdh = model.predict(x_input_gmdh)
|
| 119 |
+
temp_input_gmdh.extend(yhat_gmdh.tolist())
|
| 120 |
+
temp_input_gmdh = temp_input_gmdh[1:]
|
| 121 |
+
lst_output_gmdh.extend(yhat_gmdh.tolist())
|
| 122 |
+
i = i + 1
|
| 123 |
+
else:
|
| 124 |
+
x_input_gmdh = x_input_gmdh.reshape((1, n_steps, 1))
|
| 125 |
+
yhat_gmdh = model.predict(x_input_gmdh[0].reshape(1, -1))
|
| 126 |
+
temp_input_gmdh.extend(yhat_gmdh.tolist())
|
| 127 |
+
lst_output_gmdh.extend(yhat_gmdh.tolist())
|
| 128 |
+
i = i + 1
|
| 129 |
+
lst_output_gmdh = scaler.inverse_transform(np.array(lst_output_gmdh).reshape(-1, 1))
|
| 130 |
+
return lst_output_gmdh
|
| 131 |
+
elif method == 'Transformer':
|
| 132 |
+
x_input_transformer = test_data[len(test_data) - time_step_backward:].reshape(1, -1)
|
| 133 |
+
x_input_transformer = torch.tensor(x_input_transformer)
|
| 134 |
+
lst_output_forecast = model.predict(
|
| 135 |
+
x_input_transformer,
|
| 136 |
+
pred_days,
|
| 137 |
+
num_samples=3,
|
| 138 |
+
temperature=1.0,
|
| 139 |
+
top_k=50,
|
| 140 |
+
top_p=1.0)
|
| 141 |
+
X_train_forecast_median = np.quantile(lst_output_forecast.numpy(), 0.5, axis=1) # [:, -1]
|
| 142 |
+
lst_output_transformer = scaler.inverse_transform(X_train_forecast_median.reshape(-1, 1))
|
| 143 |
+
return lst_output_transformer
|
src/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
git+https://github.com/amazon-science/chronos-forecasting.git
|
| 2 |
+
pmdarima
|
| 3 |
+
streamlit
|
| 4 |
+
yfinance
|
| 5 |
+
gmdh
|
| 6 |
+
|
src/sidebar_portfolio.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
def sidebar():
|
| 4 |
+
import streamlit as st
|
| 5 |
+
#scale_step_type_list = ['Максимизация доходности при заданном уровне риска','Минимизация риска при заданном уровне доходности']
|
| 6 |
+
#scale_step_type = st.sidebar.selectbox('Оптимизация', scale_step_type_list)
|
| 7 |
+
scaling_strategy_list = ['average', 'median', 'undersampling']
|
| 8 |
+
top_n = st.sidebar.number_input('Количество активов-кандидатов', value=5)
|
| 9 |
+
num_scale_steps = st.sidebar.slider('Горизонт инвестирования, дней', 1, 100, 1)
|
| 10 |
+
scaling_strategy = st.sidebar.selectbox('Стратегия масштабирования', scaling_strategy_list)
|
| 11 |
+
target_return_expander = st.sidebar.expander('Задать целевую доходность')
|
| 12 |
+
target_return = target_return_expander.slider('Уровень доходности, %', 1, 100, None)
|
| 13 |
+
if target_return:
|
| 14 |
+
target_return *= 0.01
|
| 15 |
+
time_step_backward = st.sidebar.slider('Количество предикторов, дней', 1, 100, 15)
|
| 16 |
+
allow_short = st.sidebar.checkbox('Разрешить короткие позиции')
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
scaling_strategy = 'average'
|
| 20 |
+
time_step_backward = 15
|
| 21 |
+
return {'top_n': top_n,
|
| 22 |
+
'num_scale_steps': num_scale_steps,
|
| 23 |
+
'scaling_strategy': scaling_strategy,
|
| 24 |
+
'target_return': target_return,
|
| 25 |
+
'time_step_backward': time_step_backward,
|
| 26 |
+
'allow_short': allow_short}
|
| 27 |
+
|