import pandas as pd import numpy as np import math # For Evalution we will use these library from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.preprocessing import MinMaxScaler # For model building we will use these library from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.layers import LSTM from tensorflow.keras import initializers from tensorflow.keras.callbacks import EarlyStopping # For PLotting we will use these library import matplotlib.pyplot as plt import yfinance as yf import streamlit as st from gmdh import CriterionType, Criterion, Multi, Combi, Mia, Ria, PolynomialType from chronos import ChronosPipeline import torch import pmdarima as pm from pages.utils.utils import create_dataset, make_prediction, make_prediction_recursive from io import StringIO import os os.environ["YF_DISABLE_CURL_CFFI"] = "1" st.set_page_config( page_title="Model optimization", page_icon="πŸ“ˆ") @st.cache_data def get_pipeline(): pipeline = ChronosPipeline.from_pretrained( "amazon/chronos-t5-tiny", device_map="cpu", # use "cpu" for CPU inference and "mps" for Apple Silicon torch_dtype=torch.bfloat16) return pipeline pipeline = get_pipeline() seed = 42 st.title("Daily price prediction") tickers = ['BTC', 'ETH', 'BNB', #'USDC', 'XRP', 'STETH','ADA','DOGE',#'FGC', 'WTRX','LTC','SOL','TRX','DOT','MATIC','BCH','WBTC','TON11419', 'DAI','SHIB','AVAX','BUSD','LEO','LINK'] intervals = ['1d']#, '5d', '1wk', '1mo', '3mo'] #['1m', '2m', '5m','15m','30m','60m','90m','1h','1d','5d','1wk','1mo','3mo'] ticker = st.selectbox("Ticker", options=tickers) interval = st.selectbox("Interval of raw data", options = intervals) int_to_periods = {'1m':'5d', '2m':'1mo', '5m': '1mo','15m': '1mo','30m': '1mo','60m': '1mo','90m': '1mo', '1h': '1y','1d': '10y','5d': '10y','1wk': '10y','1mo': '10y','3mo': '10y'} period_cut = {'1d': '2022-02-19', '5d': '2020-06-19', '1wk': '2020-06-19', '1mo': '2014-06-19', '3mo': '2014-06-19'} uploaded_file = st.file_uploader("Choose a file") #try: maindf = yf.download(tickers = f"{ticker}-USD", # list of tickers period = int_to_periods[interval], # time period interval = interval, # trading interval prepost = False, # download pre/post market hours data? repair = True,) # repair obvious price errors e.g. 100x? if len(maindf) == 0: raise FileNotFoundError #except: # maindf = pd.read_csv(f'{ticker}.csv') if uploaded_file is not None: # To read file as bytes: bytes_data = uploaded_file.getvalue() # To convert to a string based IO: stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) # To read file as string: string_data = stringio.read() # Can be used wherever a "file-like" object is accepted: maindf = pd.read_csv(uploaded_file) st.write(maindf.head()) maindf=maindf.reset_index() maindf['Date'] = pd.to_datetime(maindf['Date'], format='%Y-%m-%d') #maindf = pd.read_csv('BTC-USD.csv') print('Total number of days present in the dataset: ',maindf.shape[0]) print('Total number of fields present in the dataset: ',maindf.shape[1]) print(maindf.head()) y_overall = maindf.copy()#.loc[(maindf['Date'] >= '2014-09-17')] #& (maindf['Date'] <= '2022-02-19')] global_expander = st.sidebar.expander('ΠŸΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹ Ρ€Π΅ΠΆΠΈΠΌΠ° модСлирования') scaling_expander= st.sidebar.expander('Π Π΅ΠΆΠΈΠΌ ΠΌΠ°ΡΡˆΡ‚Π°Π±ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΡ') scaling_strategy_list = ['median', 'average', 'undersampling'] scale_step_type_list = ['D','W','M','Y'] scale_step_type = scaling_expander.selectbox('Π¨Π°Π³ ΠΌΠ°ΡΡˆΡ‚Π°Π±ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΡ', scale_step_type_list) num_scale_steps = scaling_expander.slider('Π Π°Π·ΠΌΠ΅Ρ€ шага ΠΌΠ°ΡΡˆΡ‚Π°Π±ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΡ', 1, 100, 1) y_overall = y_overall[['Date','Close']] if num_scale_steps > 1: scaling_strategy = scaling_expander.selectbox('ΠœΠ΅Ρ‚ΠΎΠ΄ ΠΌΠ°ΡΡˆΡ‚Π°Π±ΠΈΡ€ΠΎΠ²Π°Π½ΠΈΡ', scaling_strategy_list) scaling_step_combined = str(num_scale_steps) + scale_step_type # ΠžΠΏΡ€Π΅Π΄Π΅Π»ΡΠ΅ΠΌ сСгодняшнюю Π΄Π°Ρ‚Ρƒ today = pd.Timestamp.now().normalize() if scaling_strategy == 'average': # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean() # ДобавляСм ΠΊΠΎΠ»ΠΎΠ½ΠΊΡƒ для ΠΊΠΎΠ½Ρ†Π° ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° y_overall['Interval_End'] = today - ( (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta( scaling_step_combined) # Π“Ρ€ΡƒΠΏΠΏΠΈΡ€ΡƒΠ΅ΠΌ ΠΏΠΎ ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π°ΠΌ ΠΈ считаСм срСднСС y_overall = y_overall.groupby('Interval_End')['Close'].mean().reset_index() # Π‘ΠΎΡ€Ρ‚ΠΈΡ€ΡƒΠ΅ΠΌ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True) y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1) elif scaling_strategy == 'median': # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).median() # y_overall = y_overall.groupby(pd.Grouper(key = 'Date', freq = scaling_step_combined)).mean() # ДобавляСм ΠΊΠΎΠ»ΠΎΠ½ΠΊΡƒ для ΠΊΠΎΠ½Ρ†Π° ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π° y_overall['Interval_End'] = today - ( (today - y_overall['Date']) // pd.Timedelta(scaling_step_combined)) * pd.Timedelta( scaling_step_combined) # Π“Ρ€ΡƒΠΏΠΏΠΈΡ€ΡƒΠ΅ΠΌ ΠΏΠΎ ΠΈΠ½Ρ‚Π΅Ρ€Π²Π°Π»Π°ΠΌ ΠΈ считаСм срСднСС y_overall = y_overall.groupby('Interval_End')['Close'].median().reset_index() # Π‘ΠΎΡ€Ρ‚ΠΈΡ€ΡƒΠ΅ΠΌ Ρ€Π΅Π·ΡƒΠ»ΡŒΡ‚Π°Ρ‚ y_overall = y_overall.sort_values('Interval_End') # .reset_index(drop=True) y_overall = y_overall.rename({'Interval_End': 'Date'}, axis=1) else: # y_overall = y_overall.resample(on = 'Date', rule = scaling_step_combined).last() # УстанавливаСм 'Date' ΠΊΠ°ΠΊ индСкс, Ссли это Π΅Ρ‰Ρ‘ Π½Π΅ сдСлано # y_overall = y_overall.set_index('Date') # y_overall.columns = y_overall.columns.droplevel(1) y_overall = y_overall.resample(on='Date', rule=scaling_step_combined, origin='end').last() y_overall = y_overall.reset_index() #names = cycle(['Stock Open Price','Stock Close Price','Stock High Price','Stock Low Price']) fig, ax = plt.subplots() #ax.plot(y_overall.Date, y_overall['Close'], label = 'Stock Close Price') ax.plot(y_overall['Close'], label = 'Stock Close Price') ax.legend() ax.set_title(f'Π”ΠΈΠ½Π°ΠΌΠΈΠΊΠ° Ρ†Π΅Π½Ρ‹ закрытия для {ticker}') #st.image(fig) st.pyplot(fig) #fig.show() train = st.sidebar.button('Train') time_step_backward = st.sidebar.slider('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ шагов Π½Π°Π·Π°Π΄ для ΠΏΡ€Π΅Π΄ΠΈΠΊΡ‚ΠΎΡ€ΠΎΠ²', 5, 60, 15) time_step_forward = st.sidebar.slider('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ шагов Π²ΠΏΠ΅Ρ€Π΅Π΄ для Ρ‚Π°Ρ€Π³Π΅Ρ‚Π°', 1, 60, 1) pred_days = 1 recursive_pred = False if time_step_forward == 1: expander = st.sidebar.expander('Π Π΅ΠΆΠΈΠΌ рСсурсивного ΠΏΡ€ΠΎΠ³Π½ΠΎΠ·Π°') pred_days = expander.slider('ΠšΠΎΠ»ΠΈΡ‡Π΅ΡΡ‚Π²ΠΎ шагов для рСсурсивного ΠΏΡ€ΠΎΠ³Π½ΠΎΠ·Π°', 1, 30, 15) recursive_pred = expander.checkbox('Π—Π°ΠΏΡƒΡΡ‚ΠΈΡ‚ΡŒ рСкурсивный ΠΏΡ€ΠΎΠ³Π½ΠΎΠ·') GMDH = st.sidebar.checkbox('Π”ΠΎΠ±Π°Π²ΠΈΡ‚ΡŒ Ρ€Π΅ΠΆΠΈΠΌ ΠœΠ“Π£Π') transformer = st.sidebar.checkbox('Π”ΠΎΠ±Π°Π²ΠΈΡ‚ΡŒ Ρ€Π΅ΠΆΠΈΠΌ Transformer') if GMDH: expander1 = st.sidebar.expander('Π“ΠΈΠΏΠ΅Ρ€ΠΏΠ°Ρ€Π°ΠΌΠ΅Ρ‚Ρ€Ρ‹ ΠœΠ“Π£Π') GMDHs = {'Combi': Combi(), 'Multi': Multi(), 'Mia': Mia(), 'Ria': Ria()} criterions = {'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ рСгулярности (нСсиммСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.REGULARITY, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ рСгулярности (симмСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.SYM_REGULARITY, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ ΡΡ‚Π°Π±ΠΈΠ»ΡŒΠ½ΠΎΡΡ‚ΠΈ (нСсиммСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.STABILITY, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ ΡΡ‚Π°Π±ΠΈΠ»ΡŒΠ½ΠΎΡΡ‚ΠΈ (симмСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.SYM_STABILITY, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ ΠΌΠΈΠ½ΠΈΠΌΡƒΠΌΠ° смСщСния коэффициСнтов': CriterionType.UNBIASED_COEFFS, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ ΠΌΠΈΠ½ΠΈΠΌΡƒΠΌΠ° смСщСния Ρ€Π΅ΡˆΠ΅Π½ΠΈΠΉ (нСсиммСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.UNBIASED_OUTPUTS, 'ΠšΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ ΠΌΠΈΠ½ΠΈΠΌΡƒΠΌΠ° смСщСния Ρ€Π΅ΡˆΠ΅Π½ΠΈΠΉ (симмСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.SYM_UNBIASED_OUTPUTS, 'ΠΠ±ΡΠΎΠ»ΡŽΡ‚Π½ΠΎ помСхоустойчивый ΠΊΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ (нСсиммСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.ABSOLUTE_NOISE_IMMUNITY, 'ΠΠ±ΡΠΎΠ»ΡŽΡ‚Π½ΠΎ помСхоустойчивый ΠΊΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ (симмСтричная Ρ„ΠΎΡ€ΠΌΠ°)': CriterionType.SYM_ABSOLUTE_NOISE_IMMUNITY} polynoms = {'LINEAR': PolynomialType.LINEAR, 'LINEAR_COV': PolynomialType.LINEAR_COV, 'QUADRATIC': PolynomialType.QUADRATIC} GMDH_algo = expander1.selectbox("Алгоритм ΠœΠ“Π£Π", options = GMDHs.keys()) criterion = expander1.selectbox("Π’Π½Π΅ΡˆΠ½ΠΈΠΉ ΠΊΡ€ΠΈΡ‚Π΅Ρ€ΠΈΠΉ", options = criterions.keys()) p_average = expander1.slider('p_average', 1, 10, 1) limit = expander1.number_input('limit', value = 0.) k_best = expander1.slider('k_best', 1, 10, 3 if GMDH_algo == 'Mia' else 1) polynom = expander1.selectbox("Π’ΠΈΠ΄ Π±Π°Π·ΠΎΠ²Ρ‹Ρ… ΠΏΠΎΠ»ΠΈΠ½ΠΎΠΌΠΎΠ²", options = polynoms.keys()) y_overall.columns = y_overall.columns.droplevel(1)#.droplevel() #y_overall = y_overall.reset_index() if train: my_bar = st.progress(0, text='Model training progress. Truncating the dataset now') # Lets First Take all the Close Price closedf = y_overall[['Date', 'Close']]#maindf[['Date', 'Close']] print("Shape of close dataframe:", closedf.shape) closedf = closedf[-1000:]#closedf[closedf['Date'] > period_cut[interval]] close_stock = closedf.copy() print("Total data for prediction: ", closedf.shape[0]) my_bar.progress(10 + 1, text='Truncated the dataset -> Scaling it') # deleting date column and normalizing using MinMax Scaler del closedf['Date'] scaler = MinMaxScaler(feature_range=(0, 1)) #closedf = scaler.fit_transform(np.array(closedf).reshape(-1, 1)) print(closedf.shape) my_bar.progress(20 + 1, text='Scaled the dataset -> Splitting it into subsamples') # we keep the training set as 60% and 40% testing set training_size = int(len(closedf) * 0.70) test_size = len(closedf) - training_size assert test_size > time_step_backward + time_step_forward, "Test_size is shorter than time_step_backward + time_step_forward" train_data, test_data = closedf[0:training_size], closedf[training_size:len(closedf)] train_data = scaler.fit_transform(train_data) test_data = scaler.transform(test_data) print("train_data: ", train_data.shape) print("test_data: ", test_data.shape) my_bar.progress(30 + 1, text='Split it into subsamples -> Cutting them into observations') X_train, y_train = create_dataset(train_data, time_step_backward, time_step_forward) X_test, y_test = create_dataset(test_data, time_step_backward, time_step_forward) print("X_train: ", X_train.shape) print("y_train: ", y_train.shape) print("X_test: ", X_test.shape) print("y_test", y_test.shape) # reshape input to be [samples, time steps, features] which is required for LSTM X_train_gmdh = X_train.copy() X_test_gmdh = X_test.copy() X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1) print("X_train: ", X_train.shape) print("X_test: ", X_test.shape) my_bar.progress(40 + 1, text='Cut it into observations -> Training the model') model = Sequential() model.add(LSTM(10, input_shape=(None, 1), activation="relu", kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed))) model.add(Dense(1, kernel_initializer = initializers.GlorotNormal(seed = seed), bias_initializer = initializers.GlorotNormal(seed = seed))) model.compile(loss="mean_squared_error", optimizer="adam") callback = EarlyStopping(monitor='loss', patience=30, restore_best_weights = True) history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=10, callbacks = [callback]) arima_model = pm.auto_arima(train_data, m=12, # frequency of series seasonal=True, # TRUE if seasonal series d=None, # let model determine 'd' test='adf', # use adftest to find optimal 'd' start_p=0, start_q=0, # minimum p and q max_p=time_step_backward, max_q=time_step_backward, # maximum p and q D=None, # let model determine 'D' trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) st.text(arima_model.summary()) if GMDH: model_gmdh = GMDHs[GMDH_algo] if GMDH_algo == 'Combi': model_gmdh.fit(X_train_gmdh, y_train, p_average = p_average, limit = limit, test_size=0.3, criterion = Criterion(criterion_type = criterions[criterion])) if GMDH_algo == 'Multi': model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3, criterion=Criterion(criterion_type=criterions[criterion]), k_best = k_best) if GMDH_algo in ['Ria', 'Mia']: model_gmdh.fit(X_train_gmdh, y_train, p_average=p_average, limit=limit, test_size=0.3, criterion=Criterion(criterion_type=criterions[criterion]), k_best = k_best, polynomial_type = polynoms[polynom]) st.write(f"GMDH model: {model_gmdh.get_best_polynomial()}") my_bar.progress(70 + 1, text='Trained model -> Calculating loss') import matplotlib.pyplot as plt loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(loss)) fig, ax = plt.subplots() ax.plot(epochs, loss, 'r', label='Training loss') ax.plot(epochs, val_loss, 'b', label='Validation loss') ax.legend() ax.set_title('ΠŸΠΎΡ‚Π΅Ρ€ΠΈ Π½Π° ΠΎΠ±ΡƒΡ‡Π΅Π½ΠΈΠΈ ΠΈ Π²Π°Π»ΠΈΠ΄Π°Ρ†ΠΈΠΈ') #ax.set_ylim[0, 0.2] st.pyplot(fig) my_bar.progress(80 + 1, text='Calculated loss -> Scoring the dataset') original_ytrain = scaler.inverse_transform(y_train.reshape(-1, 1)) original_ytest = scaler.inverse_transform(y_test.reshape(-1, 1)) train_predict, test_predict = make_prediction(X_train, X_test, method='LSTM', model=model, scaler=scaler, time_step_forward=time_step_forward) train_predict_arima, test_predict_arima = make_prediction(X_train, X_test, method='SARIMA', model=arima_model, scaler=scaler, time_step_forward=time_step_forward) if GMDH: train_predict_gmdh, test_predict_gmdh = make_prediction(X_train_gmdh, X_test_gmdh, method='GMDH', model=model_gmdh, scaler=scaler, time_step_forward=time_step_forward) if transformer: X_train_forecast_median, X_test_forecast_median = make_prediction(X_train_gmdh, X_test_gmdh, method='Transformer', model=pipeline, scaler=scaler, time_step_forward=time_step_forward) my_bar.progress(85 + 1, text='Scored the dataset -> Calculating perfomance metrics') # Evaluation metrices RMSE and MAE metrics_tmp = {} metrics1 = {} metrics1['LSTM'] = [] #metrics1['Transformer'] = [] metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict)) metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict) metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict) print("-------------------------------------------------------------------------------------") metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict)) metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict) metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict) #metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict) #metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict) metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict) metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict) for metric in metrics_tmp: print(metric, ': ', metrics_tmp[metric]) metrics1['LSTM'].append(metrics_tmp[metric]) metrics1['SARIMA'] = [] # metrics1['Transformer'] = [] metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_arima)) metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_arima) metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_arima) print("-------------------------------------------------------------------------------------") metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_arima)) metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_arima) metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_arima) # metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict) # metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict) metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_arima) metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_arima) for metric in metrics_tmp: print(metric, ': ', metrics_tmp[metric]) metrics1['SARIMA'].append(metrics_tmp[metric]) if GMDH: metrics1['GMDH'] = [] metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, train_predict_gmdh)) metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, train_predict_gmdh) metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, train_predict_gmdh) print("-------------------------------------------------------------------------------------") metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, test_predict_gmdh)) metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, test_predict_gmdh) metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, test_predict_gmdh) #metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict) #metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict) metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, train_predict_gmdh) metrics_tmp["Test data R2 score"] = r2_score(original_ytest, test_predict_gmdh) for metric in metrics_tmp: print(metric, ': ', metrics_tmp[metric]) metrics1['GMDH'].append(metrics_tmp[metric]) if transformer: metrics1['Transformer'] = [] metrics_tmp["Train data RMSE"] = math.sqrt(mean_squared_error(original_ytrain, X_train_forecast_median)) metrics_tmp["Train data MSE"] = mean_squared_error(original_ytrain, X_train_forecast_median) metrics_tmp["Train data MAE"] = mean_absolute_error(original_ytrain, X_train_forecast_median) print("-------------------------------------------------------------------------------------") metrics_tmp["Test data RMSE"] = math.sqrt(mean_squared_error(original_ytest, X_test_forecast_median)) metrics_tmp["Test data MSE"] = mean_squared_error(original_ytest, X_test_forecast_median) metrics_tmp["Test data MAE"] = mean_absolute_error(original_ytest, X_test_forecast_median) # metrics_tmp["Train data explained variance regression score"] = explained_variance_score(original_ytrain, train_predict) # metrics_tmp["Test data explained variance regression score"] = explained_variance_score(original_ytest, test_predict) metrics_tmp["Train data R2 score"] = r2_score(original_ytrain, X_train_forecast_median) metrics_tmp["Test data R2 score"] = r2_score(original_ytest, X_test_forecast_median) for metric in metrics_tmp: print(metric, ': ', metrics_tmp[metric]) metrics1['Transformer'].append(metrics_tmp[metric]) metrics_df = pd.DataFrame.from_dict(metrics1, orient = 'columns')#(metrics, columns = ['LSTM', 'GMDH']) metrics_df.index = metrics_tmp.keys() st.write(metrics_df) #print("Train data MGD: ", mean_gamma_deviance(original_ytrain, train_predict)) #print("Test data MGD: ", mean_gamma_deviance(original_ytest, test_predict)) #print("----------------------------------------------------------------------") #print("Train data MPD: ", mean_poisson_deviance(original_ytrain, train_predict)) #print("Test data MPD: ", mean_poisson_deviance(original_ytest, test_predict)) my_bar.progress(90 + 1, text='Calculated performance metrics -> Plotting predictions') # shift train predictions for plotting lag = time_step_backward + (time_step_forward - 1) trainPredictPlot_arima = np.empty_like(closedf) trainPredictPlot_arima[:, :] = np.nan trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :] = train_predict_arima print(trainPredictPlot_arima[lag:len(train_predict_arima) + lag, :].shape, train_predict_arima.shape) print("Train predicted data: ", trainPredictPlot_arima.shape) # shift test predictions for plotting testPredictPlot_arima = np.empty_like(closedf) testPredictPlot_arima[:, :] = np.nan testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :] = test_predict_arima print(testPredictPlot_arima[len(train_predict_arima) + (lag * 2):len(closedf), :].shape, test_predict_arima.shape) print("Test predicted data: ", testPredictPlot_arima.shape) trainPredictPlot = np.empty_like(closedf) trainPredictPlot[:, :] = np.nan trainPredictPlot[lag:len(train_predict) + lag, :] = train_predict print(trainPredictPlot[lag:len(train_predict) + lag, :].shape, train_predict.shape) print("Train predicted data: ", trainPredictPlot.shape) # shift test predictions for plotting testPredictPlot = np.empty_like(closedf) testPredictPlot[:, :] = np.nan testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :] = test_predict print(testPredictPlot[len(train_predict) + (lag * 2):len(closedf), :].shape, test_predict.shape) print("Test predicted data: ", testPredictPlot.shape) if GMDH: trainPredictPlot_gmdh = np.empty_like(closedf) trainPredictPlot_gmdh[:, :] = np.nan trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :] = train_predict_gmdh print(trainPredictPlot_gmdh[lag:len(train_predict_gmdh) + lag, :].shape, train_predict_gmdh.shape) testPredictPlot_gmdh = np.empty_like(closedf) testPredictPlot_gmdh[:, :] = np.nan testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :] = test_predict_gmdh print(testPredictPlot_gmdh[len(train_predict_gmdh) + (lag * 2):len(closedf), :].shape, test_predict_gmdh.shape) if transformer: trainPredictPlot_transformer = np.empty_like(closedf) trainPredictPlot_transformer[:, :] = np.nan trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :] = X_train_forecast_median print(trainPredictPlot_transformer[lag:len(X_train_forecast_median) + lag, :].shape, X_train_forecast_median.shape) testPredictPlot_transformer = np.empty_like(closedf) testPredictPlot_transformer[:, :] = np.nan testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :] = X_test_forecast_median print(testPredictPlot_transformer[len(X_train_forecast_median) + (lag * 2):len(closedf), :].shape, X_test_forecast_median.shape) if GMDH: if transformer: plotdf = pd.DataFrame({'date': close_stock['Date'], 'original_close': close_stock['Close'], 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(), 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(), 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(), 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(), 'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(), 'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist(), 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(), 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()}) elif not transformer: plotdf = pd.DataFrame({'date': close_stock['Date'], 'original_close': close_stock['Close'], 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(), 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(), 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(), 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(), 'train_predicted_close_gmdh': trainPredictPlot_gmdh.reshape(1, -1)[0].tolist(), 'test_predicted_close_gmdh': testPredictPlot_gmdh.reshape(1, -1)[0].tolist()}) elif not GMDH: if transformer: plotdf = pd.DataFrame({'date': close_stock['Date'], 'original_close': close_stock['Close'], 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(), 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(), 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(), 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist(), 'train_predicted_close_transformer': trainPredictPlot_transformer.reshape(1, -1)[0].tolist(), 'test_predicted_close_transformer': testPredictPlot_transformer.reshape(1, -1)[0].tolist()}) else: plotdf = pd.DataFrame({'date': close_stock['Date'], 'original_close': close_stock['Close'], 'train_predicted_close_arima': trainPredictPlot_arima.reshape(1, -1)[0].tolist(), 'test_predicted_close_arima': testPredictPlot_arima.reshape(1, -1)[0].tolist(), 'train_predicted_close': trainPredictPlot.reshape(1, -1)[0].tolist(), 'test_predicted_close': testPredictPlot.reshape(1, -1)[0].tolist()}) fig, ax = plt.subplots() ax.plot(plotdf['date'], plotdf['original_close'], label='ΠžΡ€ΠΈΠ³ΠΈΠ½Π°Π»ΡŒΠ½Π°Ρ Ρ†Π΅Π½Π° закрытия') ax.plot(plotdf['date'], plotdf['train_predicted_close_arima'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° Ρ‚Ρ€Π΅Π½ΠΈΡ€ΠΎΠ²ΠΊΠ΅ SARIMA') ax.plot(plotdf['date'], plotdf['test_predicted_close_arima'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° тСстС SARIMA') ax.plot(plotdf['date'], plotdf['train_predicted_close'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° Ρ‚Ρ€Π΅Π½ΠΈΡ€ΠΎΠ²ΠΊΠ΅') ax.plot(plotdf['date'], plotdf['test_predicted_close'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° тСстС') if GMDH: ax.plot(plotdf['date'], plotdf['train_predicted_close_gmdh'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° Ρ‚Ρ€Π΅Π½ΠΈΡ€ΠΎΠ²ΠΊΠ΅ GMDH') ax.plot(plotdf['date'], plotdf['test_predicted_close_gmdh'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° тСстС GMDH') if transformer: ax.plot(plotdf['date'], plotdf['train_predicted_close_transformer'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° Ρ‚Ρ€Π΅Π½ΠΈΡ€ΠΎΠ²ΠΊΠ΅ Transformer') ax.plot(plotdf['date'], plotdf['test_predicted_close_transformer'], label='ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Π°Ρ Ρ†Π΅Π½Π° закрытия Π½Π° тСстС Transformer') ax.legend() ax.set_title("Π‘Ρ€Π°Π²Π½Π΅Π½ΠΈΠ΅ исходных ΠΈ смодСлированных Ρ†Π΅Π½") st.pyplot(fig) my_bar.progress(100, text='Done') if recursive_pred: lst_output_arima = make_prediction_recursive(test_data=test_data, method='SARIMA', model=arima_model, scaler=scaler, pred_days=pred_days, time_step_backward=time_step_backward) lst_output_lstm = make_prediction_recursive(test_data=test_data, method='LSTM', model=model, scaler=scaler, pred_days=pred_days, time_step_backward=time_step_backward) if GMDH: lst_output_gmdh = make_prediction_recursive(test_data=test_data, method='GMDH', model=model_gmdh, scaler=scaler, pred_days=pred_days, time_step_backward=time_step_backward) if transformer: lst_output_transformer = make_prediction_recursive(test_data=test_data, method='Transformer', model=pipeline, scaler=scaler, pred_days=pred_days, time_step_backward=time_step_backward) """ x_input = test_data[len(test_data) - time_step_backward:].reshape(1, -1) temp_input = list(x_input) temp_input = temp_input[0].tolist() lst_output = [] n_steps = time_step_backward i = 0 while (i < pred_days): if (len(temp_input) > time_step_backward): x_input = np.array(temp_input[1:]) # print("{} day input {}".format(i,x_input)) x_input = x_input.reshape(1, -1) x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) # print("{} day output {}".format(i,yhat)) temp_input.extend(yhat[0].tolist()) temp_input = temp_input[1:] # print(temp_input) lst_output.extend(yhat.tolist()) i = i + 1 else: x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) lst_output.extend(yhat.tolist()) i = i + 1 print("Output of predicted next steps: ", len(lst_output)) """ last_days = np.arange(1, time_step_backward + 1) day_pred = np.arange(time_step_backward + 1, time_step_backward + pred_days + 1) print(last_days) print(day_pred) temp_mat = np.empty((len(last_days) + pred_days, 1)) temp_mat[:] = np.nan """ last_original_days_value = temp_mat.copy() next_predicted_days_value = temp_mat.copy() last_original_days_value[0:time_step_backward] = closedf[len(closedf) - time_step_backward:].values next_predicted_days_value[time_step_backward:] = scaler.inverse_transform(np.array(lst_output)) """ last_original_days_value = temp_mat.copy() next_predicted_days_value_arima = temp_mat.copy() next_predicted_days_value_lstm = temp_mat.copy() if GMDH: next_predicted_days_value_gmdh = temp_mat.copy() if transformer: next_predicted_days_value_transformer = temp_mat.copy() last_original_days_value[0:time_step_backward] = \ closedf[len(closedf) - time_step_backward:].values next_predicted_days_value_arima[time_step_backward:] = lst_output_arima next_predicted_days_value_lstm[time_step_backward:] = lst_output_lstm if GMDH: next_predicted_days_value_gmdh[time_step_backward:] = lst_output_gmdh if transformer: next_predicted_days_value_transformer[time_step_backward:] = lst_output_transformer """ new_pred_plot = pd.DataFrame({ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0], 'next_predicted_days_value': next_predicted_days_value.reshape(1, -1).tolist()[0] }) fig, ax = plt.subplots() ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'], label=f"ПослСдниС {time_step_backward} шагов Ρ†Π΅Π½Ρ‹ закратия") ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value'], label=f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Ρ‹Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠ΅ {pred_days} шагов Ρ†Π΅Π½Ρ‹ закрытия") ax.legend() ax.set_title(f"БравнСния послСдних {time_step_backward} шагов ΠΈ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΡ… {pred_days} шагов") st.pyplot(fig) """ if GMDH: if transformer: new_pred_plot = pd.DataFrame({ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0], 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0], 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0], 'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0], 'next_predicted_days_value_transformer': next_predicted_days_value_transformer.reshape(1, -1).tolist()[0] }) elif not transformer: new_pred_plot = pd.DataFrame({ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0], 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0], 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0], 'next_predicted_days_value_gmdh': next_predicted_days_value_gmdh.reshape(1, -1).tolist()[0] }) elif not GMDH: if transformer: new_pred_plot = pd.DataFrame({ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0], 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0], 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0], 'next_predicted_days_value_transformer': next_predicted_days_value_transformer.reshape(1, -1).tolist()[0] }) else: new_pred_plot = pd.DataFrame({ 'last_original_days_value': last_original_days_value.reshape(1, -1).tolist()[0], 'next_predicted_days_value_arima': next_predicted_days_value_arima.reshape(1, -1).tolist()[0], 'next_predicted_days_value_lstm': next_predicted_days_value_lstm.reshape(1, -1).tolist()[0] }) fig, ax = plt.subplots() ax.plot(new_pred_plot.index, new_pred_plot['last_original_days_value'], label=f"ПослСдниС {time_step_backward} шагов Ρ†Π΅Π½Ρ‹ закратия") ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_arima'], label=f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Ρ‹Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠ΅ {pred_days} шагов Ρ†Π΅Π½Ρ‹ закрытия SARIMA") ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_lstm'], label=f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Ρ‹Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠ΅ {pred_days} шагов Ρ†Π΅Π½Ρ‹ закрытия LSTM") if GMDH: ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_gmdh'], label=f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Ρ‹Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠ΅ {pred_days} шагов Ρ†Π΅Π½Ρ‹ закрытия GMDH") if transformer: ax.plot(new_pred_plot.index, new_pred_plot['next_predicted_days_value_transformer'], label=f"ΠŸΡ€Π΅Π΄ΡΠΊΠ°Π·Π°Π½Π½Ρ‹Π΅ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΠ΅ {pred_days} шагов Ρ†Π΅Π½Ρ‹ закрытия Transformer") ax.legend() ax.set_title(f"БравнСния послСдних {time_step_backward} шагов ΠΈ ΡΠ»Π΅Π΄ΡƒΡŽΡ‰ΠΈΡ… {pred_days} шагов") ax.set_ylim(0, closedf['Close'].max() * 1.5) st.pyplot(fig) #ax.plot() @st.cache_data def convert_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun return df.to_csv().encode("utf-8") @st.cache_data def convert_metrics_df(df): # IMPORTANT: Cache the conversion to prevent computation on every rerun return df.to_csv().encode("utf-8") plotdf_csv = convert_df(plotdf) metrics_df_csv = convert_metrics_df(metrics_df) st.download_button('Download data', plotdf_csv, file_name='predictions.csv', mime="text/csv") st.download_button('Download metrics', metrics_df_csv, file_name='metrics.csv', mime="text/csv")