| | |
| | |
| | |
| | import copy |
| | from pathlib import Path |
| | import warnings |
| | import holidays |
| | import seaborn as sns |
| | import matplotlib |
| | import matplotlib.dates as mdates |
| | import matplotlib.pyplot as plt |
| | plt.style.use('fivethirtyeight') |
| | import numpy as np |
| | import pandas as pd |
| | import glob |
| | import csv |
| | import lightning.pytorch as pl |
| | from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor |
| | from lightning.pytorch.loggers import TensorBoardLogger |
| | import torch |
| | from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet |
| | from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder |
| | from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss |
| | from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
| | import random |
| | import gc |
| | import tensorflow as tf |
| | import tensorboard as tb |
| | tf.io.gfile = tb.compat.tensorflow_stub.io.gfile |
| | import os |
| | import math |
| | import sys |
| | from sklearn.model_selection import train_test_split |
| | from sklearn.preprocessing import MinMaxScaler |
| | import tensorflow as tf |
| | from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed |
| | from tensorflow.keras.layers import MaxPooling1D, Flatten |
| | from tensorflow.keras.regularizers import L1, L2 |
| | from tensorflow.keras.metrics import Accuracy |
| | from tensorflow.keras.metrics import RootMeanSquaredError |
| | from sklearn.metrics import mean_squared_error as MSE |
| | from sklearn.model_selection import KFold |
| | from sklearn.inspection import permutation_importance |
| | from tensorflow.keras.utils import plot_model |
| | from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance, mean_squared_error, mean_squared_log_error, d2_absolute_error_score, d2_pinball_score, d2_tweedie_score |
| | from sklearn.metrics import r2_score |
| | from sklearn.metrics import max_error |
| | import datetime |
| | from datetime import date |
| | import optuna |
| | from tensorflow.keras.callbacks import Callback |
| | from optuna.integration import TFKerasPruningCallback |
| | import shutil |
| | import gradio as gr |
| |
|
| | |
| | DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
| | random.seed(30) |
| | np.random.seed(30) |
| | tf.random.set_seed(30) |
| | torch.manual_seed(30) |
| | torch.cuda.manual_seed(30) |
| |
|
| | |
| | PATIENCE = 30 |
| | MAX_EPOCHS = 3 |
| | LEARNING_RATE = 0.01 |
| | OPTUNA = True |
| | ACCELERATOR = "gpu" |
| | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024" |
| |
|
| | |
| | w = 7 |
| | prax = [0 for x in range(w)] |
| |
|
| | |
| | |
| | def objective(trial, X_train, y_train, X_test, y_test): |
| | model = tf.keras.Sequential() |
| |
|
| | |
| | |
| | model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
| | |
| |
|
| | |
| | model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_1", 32, 256), return_sequences=True))) |
| | model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5))) |
| | model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_2", 32, 256), return_sequences=False))) |
| | model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5))) |
| |
|
| | |
| | model.add(Dense(1, activation='relu')) |
| | model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
| |
|
| | |
| | pruning_callback = TFKerasPruningCallback(trial, "val_loss") |
| | history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=32, verbose=0, callbacks=[pruning_callback]) |
| |
|
| | |
| | loss = model.evaluate(X_test, y_test, verbose=0)[0] |
| |
|
| | return loss |
| |
|
| | |
| | |
| | def modelCNNLSTM(csv_file, prax): |
| | |
| | df = csv_file |
| | df = df['Date/Time'].values.astype("float64") |
| | temp_data = df.iloc[0:len(df)-100, 1:23] |
| | trek = df.iloc[len(df)-100:,1:23] |
| | |
| | data = temp_data |
| | data = data.values.astype("float64") |
| | sc = MinMaxScaler() |
| | |
| | train_size = int(len(data) * 0.8) |
| | train_data, test_data = data[:train_size], data[train_size:] |
| | |
| | X_train, y_train = train_data, train_data['Close'] |
| | X_test, y_test = test_data, test_data['Close'] |
| |
|
| | X_train = X_train[0:len(X_train)-1] |
| | y_train = y_train[1:len(y_train)] |
| | X_test = X_test[0:len(X_test)-1] |
| | y_test = y_test[1:len(y_test)] |
| |
|
| | Xt = X_train |
| | Xts = X_test |
| | Yt = y_train |
| | Yts = y_test |
| |
|
| | y_train = y_train.values.reshape(-1,1) |
| | y_test = y_test.values.reshape(-1,1) |
| |
|
| | X_train = sc.fit_transform(X_train) |
| | y_train = sc.fit_transform(y_train) |
| | X_test = sc.fit_transform(X_test) |
| | y_test = sc.fit_transform(y_test) |
| |
|
| | x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns) |
| | y_tr=pd.DataFrame(y_train, index = Yt.index) |
| | x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns) |
| | y_te=pd.DataFrame(y_test, index = Yts.index) |
| |
|
| | |
| | X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) |
| | X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) |
| |
|
| | study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5)) |
| | fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) |
| | study.optimize(fn, n_trials=7) |
| |
|
| | best_params = study.best_params |
| | |
| |
|
| | model = tf.keras.Sequential() |
| |
|
| | |
| | |
| | model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
| | |
| |
|
| | |
| | model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True))) |
| | model.add(Dropout(best_params["dropout_1"])) |
| | model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False))) |
| | model.add(Dropout(best_params["dropout_2"])) |
| |
|
| | |
| | model.add(Dense(1, activation='relu')) |
| | model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
| |
|
| | |
| | history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0) |
| |
|
| | |
| | loss = model.evaluate(X_test, y_test, verbose=0)[0] |
| |
|
| | print(f"Final loss (without KFold): {loss}") |
| | |
| | kfold = KFold(n_splits=10, shuffle=True) |
| |
|
| | inputs = np.concatenate((X_train, X_test), axis=0) |
| | targets = np.concatenate((y_train, y_test), axis=0) |
| | acc_per_fold = [] |
| | loss_per_fold = [] |
| | xgb_res = [] |
| | num_epochs = 10 |
| | batch_size = 32 |
| |
|
| | fold_no = 1 |
| | print('------------------------------------------------------------------------') |
| | print("Training for 10 folds... Standby") |
| | for train, test in kfold.split(inputs, targets): |
| | |
| | |
| | history = model.fit(inputs[train], targets[train], |
| | batch_size=32, |
| | epochs=15, |
| | verbose=0) |
| |
|
| | scores = model.evaluate(inputs[test], targets[test], verbose=0) |
| | |
| | acc_per_fold.append(scores[1] * 100) |
| | loss_per_fold.append(scores[0]) |
| | fold_no = fold_no + 1 |
| |
|
| | |
| | print('------------------------------------------------------------------------') |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | trek = df.iloc[0:len(df), 1:23] |
| | Y = trek[0:len(trek)] |
| | YP = trek[1:len(trek)] |
| | Y = Y.values.astype("float64") |
| | YP = YP.values.astype("float64") |
| | Y1 = Y['Close'] |
| | Y2 = YP['Close'] |
| | Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns) |
| | |
| | Y = np.array(Y) |
| | Y1 = np.array(Y1) |
| | Y = sc.fit_transform(Y) |
| | Y1 = Y1.reshape(-1,1) |
| | Y1 = sc.fit_transform(Y1) |
| |
|
| | train_X = Y.reshape(Y.shape[0],Y.shape[1],1) |
| | |
| | pred = model.predict(train_X, verbose=0) |
| | pred = np.array(pred).reshape(-1,1) |
| | var2 = max_error(pred.reshape(-1,1), Y1) |
| | print('Max Error: %f' % var2) |
| | prax[5] = float(var2) |
| | pred = sc.inverse_transform(pred) |
| |
|
| | print(pred[-2], pred[-1]) |
| | prax[3] = pred[-2] |
| | prax[4] = pred[-1] |
| | if(pred[-1]-pred[-2]>0): |
| | prax[6] = 1 |
| | elif(pred[-1]-pred[-2]==0): |
| | prax[6] = 0 |
| | else: |
| | prax[6] = -1 |
| |
|
| | |
| | |
| | def modelCNNLSTM_OpenGap(csv_file, prax): |
| | |
| | df = csv_file |
| | datLength = len(df) |
| | df['O-C'] = 0 |
| | for i in range(datLength): |
| | if i == 0: |
| | df['O-C'][i] = 0 |
| | continue |
| | else: |
| | df['O-C'][i] = df['Open'][i] - df['Close'][i-1] |
| | temp_data = df.iloc[0:datLength-100, 1:24] |
| | trek = df.iloc[datLength-100:,1:24] |
| | |
| | data = temp_data |
| | data = data.values.astype("float64") |
| | sc = MinMaxScaler() |
| | |
| | train_size = int(len(data) * 0.8) |
| | train_data, test_data = data[:train_size], data[train_size:] |
| |
|
| | |
| | X_train, y_train = train_data, train_data['Close'] |
| | X_test, y_test = test_data, test_data['Close'] |
| |
|
| | X_train = X_train[0:len(X_train)-1] |
| | y_train = y_train[1:len(y_train)] |
| | X_test = X_test[0:len(X_test)-1] |
| | y_test = y_test[1:len(y_test)] |
| |
|
| | Xt = X_train |
| | Xts = X_test |
| | Yt = y_train |
| | Yts = y_test |
| |
|
| | y_train = y_train.values.reshape(-1,1) |
| | y_test = y_test.values.reshape(-1,1) |
| |
|
| | X_train = sc.fit_transform(X_train) |
| | y_train = sc.fit_transform(y_train) |
| | X_test = sc.fit_transform(X_test) |
| | y_test = sc.fit_transform(y_test) |
| |
|
| | x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns) |
| | y_tr=pd.DataFrame(y_train, index = Yt.index) |
| | x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns) |
| | y_te=pd.DataFrame(y_test, index = Yts.index) |
| |
|
| | |
| | X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1)) |
| | X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1)) |
| |
|
| | study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=5, n_startup_trials=5)) |
| | fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test) |
| | study.optimize(fn, n_trials=7) |
| |
|
| | best_params = study.best_params |
| | |
| |
|
| | model = tf.keras.Sequential() |
| |
|
| | |
| | |
| | model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1))) |
| | |
| |
|
| | |
| | model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True))) |
| | model.add(Dropout(best_params["dropout_1"])) |
| | model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False))) |
| | model.add(Dropout(best_params["dropout_2"])) |
| |
|
| | |
| | model.add(Dense(1, activation='relu')) |
| | model.compile(optimizer='adam', loss='mse', metrics=['mse']) |
| |
|
| | |
| | history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0) |
| |
|
| | |
| | loss = model.evaluate(X_test, y_test, verbose=0)[0] |
| |
|
| | print(f"Final loss (without KFold): {loss}") |
| | |
| | kfold = KFold(n_splits=10, shuffle=True) |
| |
|
| | inputs = np.concatenate((X_train, X_test), axis=0) |
| | targets = np.concatenate((y_train, y_test), axis=0) |
| | acc_per_fold = [] |
| | loss_per_fold = [] |
| | xgb_res = [] |
| | num_epochs = 10 |
| | batch_size = 32 |
| |
|
| | fold_no = 1 |
| | print('------------------------------------------------------------------------') |
| | print("Training for 10 folds... Standby") |
| | for train, test in kfold.split(inputs, targets): |
| | |
| | |
| | history = model.fit(inputs[train], targets[train], |
| | batch_size=32, |
| | epochs=15, |
| | verbose=0) |
| |
|
| | scores = model.evaluate(inputs[test], targets[test], verbose=0) |
| | |
| | acc_per_fold.append(scores[1] * 100) |
| | loss_per_fold.append(scores[0]) |
| | fold_no = fold_no + 1 |
| |
|
| | |
| | print('------------------------------------------------------------------------') |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | trek = df.iloc[0:len(df), 1:24] |
| | Y = trek[0:len(trek)] |
| | YP = trek[1:len(trek)] |
| | Y = Y.values.astype("float64") |
| | YP = YP.values.astype("float64") |
| | Y1 = Y['Close'] |
| | Y2 = YP['Close'] |
| | Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns) |
| | |
| | Y = np.array(Y) |
| | Y1 = np.array(Y1) |
| | Y = sc.fit_transform(Y) |
| | Y1 = Y1.reshape(-1,1) |
| | Y1 = sc.fit_transform(Y1) |
| |
|
| | train_X = Y.reshape(Y.shape[0],Y.shape[1],1) |
| | |
| | pred = model.predict(train_X, verbose=0) |
| | pred = np.array(pred).reshape(-1,1) |
| | var2 = max_error(pred.reshape(-1,1), Y1) |
| | print('Max Error: %f' % var2) |
| | prax[5] = float(var2) |
| | pred = sc.inverse_transform(pred) |
| |
|
| | print(pred[-2], pred[-1]) |
| | prax[3] = pred[-2] |
| | prax[4] = pred[-1] |
| | if(pred[-1]-pred[-2]>0): |
| | prax[6] = 1 |
| | elif(pred[-1]-pred[-2]==0): |
| | prax[6] = 0 |
| | else: |
| | prax[6] = -1 |
| |
|
| | |
| | |
| | def modelTFT(csv_file, prax): |
| | train = csv_file |
| | |
| | train['date'] = pd.to_datetime(train['Date/Time']) |
| | |
| |
|
| | data = pd.concat([train], axis = 0, ignore_index=True) |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>""" |
| |
|
| | |
| |
|
| | data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True) |
| | .rename_axis('time_idx')).reset_index(), on = ['Date/Time'])) |
| | |
| | data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") |
| | data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") |
| | data["month"] = data['date'].dt.month.astype(str).astype("category") |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | gc.collect() |
| | data.sample(5, random_state=30) |
| |
|
| | train = data.iloc[:len(train)] |
| | test = data.iloc[len(train):] |
| |
|
| | max_prediction_length = 2 |
| | max_encoder_length = train.date.nunique() |
| | training_cutoff = train["time_idx"].max() - max_prediction_length |
| |
|
| | |
| | training = TimeSeriesDataSet( |
| | train[lambda x: x.time_idx <= training_cutoff], |
| | time_idx="time_idx", |
| | target="Close", |
| | group_ids=["Ticker"], |
| | min_encoder_length=max_prediction_length, |
| | max_encoder_length=max_encoder_length, |
| | max_prediction_length=max_prediction_length, |
| | static_categoricals=["Ticker"], |
| | time_varying_known_categoricals=["month", "week_of_year", "day_of_week"], |
| | |
| | time_varying_known_reals=["time_idx"], |
| | time_varying_unknown_categoricals=[], |
| | time_varying_unknown_reals=[ |
| | 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200' |
| | ], |
| | target_normalizer=GroupNormalizer( |
| | groups=['Ticker'], transformation="softplus" |
| | ), |
| | categorical_encoders={ |
| | 'week_of_year':NaNLabelEncoder(add_nan=True) |
| | }, |
| | |
| | add_relative_time_idx=True, |
| | add_target_scales=True, |
| | add_encoder_length=True, |
| | ) |
| |
|
| | |
| | |
| | validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True) |
| |
|
| | |
| | batch_size = 128 |
| | train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) |
| | val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0) |
| |
|
| | |
| |
|
| | actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]).cuda() |
| | baseline_predictions = Baseline().predict(val_dataloader).cuda() |
| | (actuals - baseline_predictions).abs().mean().item() |
| |
|
| | sm = SMAPE() |
| |
|
| | print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}") |
| |
|
| | early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min") |
| | lr_logger = LearningRateMonitor() |
| | logger = TensorBoardLogger("lightning_logs") |
| |
|
| | trainer = pl.Trainer( |
| | max_epochs=1, |
| | accelerator=ACCELERATOR, |
| | enable_model_summary=False, |
| | gradient_clip_val=0.25, |
| | limit_train_batches=10, |
| | |
| | callbacks=[lr_logger, early_stop_callback], |
| | logger=logger, |
| | ) |
| |
|
| | tft = TemporalFusionTransformer.from_dataset( |
| | training, |
| | learning_rate=LEARNING_RATE, |
| | lstm_layers=2, |
| | hidden_size=16, |
| | attention_head_size=2, |
| | dropout=0.2, |
| | hidden_continuous_size=8, |
| | output_size=1, |
| | loss=SMAPE(), |
| | log_interval=10, |
| | reduce_on_plateau_patience=4 |
| | ) |
| |
|
| | tft.to(DEVICE) |
| | trainer.fit( |
| | tft, |
| | train_dataloaders=train_dataloader, |
| | val_dataloaders=val_dataloader, |
| | ) |
| | |
| | |
| |
|
| | if OPTUNA: |
| | from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
| |
|
| | |
| | study = optimize_hyperparameters( |
| | train_dataloader, |
| | val_dataloader, |
| | model_path="optuna_test", |
| | n_trials=5, |
| | max_epochs=MAX_EPOCHS, |
| | gradient_clip_val_range=(0.01, 0.3), |
| | hidden_size_range=(8, 24), |
| | hidden_continuous_size_range=(8, 12), |
| | attention_head_size_range=(2, 4), |
| | learning_rate_range=(0.01, 0.05), |
| | dropout_range=(0.1, 0.25), |
| | trainer_kwargs=dict(limit_train_batches=20), |
| | reduce_on_plateau_patience=4, |
| | pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_startup_trials=3), |
| | use_learning_rate_finder=False, |
| | ) |
| | |
| | |
| | trainer = pl.Trainer( |
| | max_epochs=MAX_EPOCHS, |
| | accelerator=ACCELERATOR, |
| | enable_model_summary=False, |
| | gradient_clip_val=study.best_params['gradient_clip_val'], |
| | limit_train_batches=20, |
| | |
| | callbacks=[lr_logger, early_stop_callback], |
| | logger=logger, |
| | ) |
| | |
| | tft = TemporalFusionTransformer.from_dataset( |
| | training, |
| | learning_rate=study.best_params['learning_rate'], |
| | lstm_layers=2, |
| | hidden_size=study.best_params['hidden_size'], |
| | attention_head_size=study.best_params['attention_head_size'], |
| | dropout=study.best_params['dropout'], |
| | hidden_continuous_size=study.best_params['hidden_continuous_size'], |
| | output_size=1, |
| | loss=SMAPE(), |
| | log_interval=10, |
| | reduce_on_plateau_patience=4 |
| | ) |
| |
|
| | tft.to(DEVICE) |
| | trainer.fit( |
| | tft, |
| | train_dataloaders=train_dataloader, |
| | val_dataloaders=val_dataloader, |
| | ) |
| | |
| | |
| | best_model_path = trainer.checkpoint_callback.best_model_path |
| | best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) |
| | actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]).cuda() |
| | predictions = best_tft.predict(val_dataloader, mode="prediction") |
| | raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True) |
| |
|
| | sm = SMAPE() |
| | print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions.cuda()).mean(axis = 1).median().item()}") |
| | prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item() |
| | |
| |
|
| | print(raw_predictions[0][0]) |
| | prax[3] = '-' |
| | prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0] |
| | t = prax[4] |
| | tm = data['Close'][len(data)-1] |
| | if(t-tm>0): |
| | prax[6] = 1 |
| | elif(t-tm==0): |
| | prax[6] = 0 |
| | else: |
| | prax[6] = -1 |
| | |
| | print("-----------") |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | def modelTFT_OpenGap(csv_file, prax): |
| | train = csv_file |
| | |
| | train['date'] = pd.to_datetime(train['Date/Time']) |
| | |
| | datLength = len(train) |
| | train['O-C'] = 0 |
| | for i in range(datLength): |
| | if i == 0: |
| | train['O-C'][i] = 0 |
| | continue |
| | else: |
| | train['O-C'][i] = train['Open'][i] - train['Close'][i-1] |
| | data = pd.concat([train], axis = 0, ignore_index=True) |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | """<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>""" |
| |
|
| | |
| |
|
| | data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True) |
| | .rename_axis('time_idx')).reset_index(), on = ['Date/Time'])) |
| | |
| | data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") |
| | data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") |
| | data["month"] = data['date'].dt.month.astype(str).astype("category") |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | gc.collect() |
| | data.sample(5, random_state=30) |
| |
|
| | train = data.iloc[:len(train)] |
| | test = data.iloc[len(train):] |
| |
|
| | max_prediction_length = 2 |
| | max_encoder_length = train.date.nunique() |
| | training_cutoff = train["time_idx"].max() - max_prediction_length |
| |
|
| | |
| | training = TimeSeriesDataSet( |
| | train[lambda x: x.time_idx <= training_cutoff], |
| | time_idx="time_idx", |
| | target="Close", |
| | group_ids=["Ticker"], |
| | min_encoder_length=max_prediction_length, |
| | max_encoder_length=max_encoder_length, |
| | max_prediction_length=max_prediction_length, |
| | static_categoricals=["Ticker"], |
| | time_varying_known_categoricals=["month", "week_of_year", "day_of_week"], |
| | |
| | time_varying_known_reals=["time_idx"], |
| | time_varying_unknown_categoricals=[], |
| | time_varying_unknown_reals=[ |
| | 'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','BV11','SV11','Ema5','Ema20','Ema50','Ema200', 'O-C' |
| | ], |
| | target_normalizer=GroupNormalizer( |
| | groups=['Ticker'], transformation="softplus" |
| | ), |
| | categorical_encoders={ |
| | 'week_of_year':NaNLabelEncoder(add_nan=True) |
| | }, |
| | |
| | add_relative_time_idx=True, |
| | add_target_scales=True, |
| | add_encoder_length=True, |
| | ) |
| |
|
| | |
| | |
| | validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True) |
| |
|
| | |
| | batch_size = 128 |
| | train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0) |
| | val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0) |
| |
|
| | |
| |
|
| | actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)]).cuda() |
| | baseline_predictions = Baseline().predict(val_dataloader).cuda() |
| | (actuals - baseline_predictions).abs().mean().item() |
| |
|
| | sm = SMAPE() |
| |
|
| | print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}") |
| |
|
| | early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min") |
| | lr_logger = LearningRateMonitor() |
| | logger = TensorBoardLogger("lightning_logs") |
| |
|
| | trainer = pl.Trainer( |
| | max_epochs=1, |
| | accelerator=ACCELERATOR, |
| | enable_model_summary=False, |
| | gradient_clip_val=0.25, |
| | limit_train_batches=10, |
| | |
| | callbacks=[lr_logger, early_stop_callback], |
| | logger=logger, |
| | ) |
| |
|
| | tft = TemporalFusionTransformer.from_dataset( |
| | training, |
| | learning_rate=LEARNING_RATE, |
| | lstm_layers=2, |
| | hidden_size=16, |
| | attention_head_size=2, |
| | dropout=0.2, |
| | hidden_continuous_size=8, |
| | output_size=1, |
| | loss=SMAPE(), |
| | log_interval=10, |
| | reduce_on_plateau_patience=4 |
| | ) |
| |
|
| | tft.to(DEVICE) |
| | trainer.fit( |
| | tft, |
| | train_dataloaders=train_dataloader, |
| | val_dataloaders=val_dataloader, |
| | ) |
| | |
| | |
| |
|
| | if OPTUNA: |
| | from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters |
| |
|
| | |
| | study = optimize_hyperparameters( |
| | train_dataloader, |
| | val_dataloader, |
| | model_path="optuna_test", |
| | n_trials=5, |
| | max_epochs=MAX_EPOCHS, |
| | gradient_clip_val_range=(0.01, 0.3), |
| | hidden_size_range=(8, 24), |
| | hidden_continuous_size_range=(8, 12), |
| | attention_head_size_range=(2, 4), |
| | learning_rate_range=(0.01, 0.05), |
| | dropout_range=(0.1, 0.25), |
| | trainer_kwargs=dict(limit_train_batches=20), |
| | reduce_on_plateau_patience=4, |
| | pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_warmup_steps=3), |
| | use_learning_rate_finder=False, |
| | ) |
| | |
| | |
| | trainer = pl.Trainer( |
| | max_epochs=MAX_EPOCHS, |
| | accelerator=ACCELERATOR, |
| | enable_model_summary=False, |
| | gradient_clip_val=study.best_params['gradient_clip_val'], |
| | limit_train_batches=20, |
| | |
| | callbacks=[lr_logger, early_stop_callback], |
| | logger=logger, |
| | ) |
| | |
| | tft = TemporalFusionTransformer.from_dataset( |
| | training, |
| | learning_rate=study.best_params['learning_rate'], |
| | lstm_layers=2, |
| | hidden_size=study.best_params['hidden_size'], |
| | attention_head_size=study.best_params['attention_head_size'], |
| | dropout=study.best_params['dropout'], |
| | hidden_continuous_size=study.best_params['hidden_continuous_size'], |
| | output_size=1, |
| | loss=SMAPE(), |
| | log_interval=10, |
| | reduce_on_plateau_patience=4 |
| | ) |
| |
|
| | tft.to(DEVICE) |
| | trainer.fit( |
| | tft, |
| | train_dataloaders=train_dataloader, |
| | val_dataloaders=val_dataloader, |
| | ) |
| | |
| | |
| | best_model_path = trainer.checkpoint_callback.best_model_path |
| | best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path) |
| | actuals = torch.cat([y[0] for x, y in iter(val_dataloader)]).cuda() |
| | predictions = best_tft.predict(val_dataloader, mode="prediction") |
| | raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True) |
| |
|
| | sm = SMAPE() |
| | print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions.cuda()).mean(axis = 1).median().item()}") |
| | prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item() |
| | |
| |
|
| | print(raw_predictions[0][0]) |
| | prax[3] = '-' |
| | prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0] |
| | t = prax[4] |
| | tm = data['Close'][len(data)-1] |
| | if(t-tm>0): |
| | prax[6] = 1 |
| | elif(t-tm==0): |
| | prax[6] = 0 |
| | else: |
| | prax[6] = -1 |
| | |
| | print("-----------") |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | def generate_csv(data_list): |
| | filename = f"result.csv" |
| | file_exists = os.path.isfile(filename) |
| | with open(filename, mode='a', newline='') as csv_file: |
| | fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] |
| | writer = csv.writer(csv_file, delimiter=',') |
| | if not file_exists: |
| | writer.writerow(fieldnames) |
| | writer.writerow(data_list) |
| | csv_file.close() |
| |
|
| | def fileOutput(): |
| | today = date.today().strftime("%Y_%m_%d") |
| | filename = f"result.csv" |
| | shutil.copyfile(filename, f"result_{today}.csv") |
| | return f"result_{today}.csv" |
| |
|
| | def guess_date(string): |
| | for fmt in ["%Y/%m/%d", "%d-%m-%Y", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%Y-%m-%d", "%d/%m/%y", "%m/%d/%y"]: |
| | try: |
| | return datetime.datetime.strptime(string, fmt).date() |
| | except ValueError: |
| | continue |
| | raise ValueError(string) |
| |
|
| | |
| | |
| | def main(files): |
| | |
| | prax = [0,0,0,0,0,0,0] |
| | for idx, file in enumerate(files): |
| | print(f"File #{idx+1}: {file}") |
| | print(file.name) |
| | df = pd.read_csv(file.name) |
| | print(df['Ticker'][0]) |
| | prax[0] = df['Ticker'][0] |
| | prax[1] = df['Close'][len(df)-1] |
| | print('------------------') |
| | df = df.drop(['EMARSI'], axis=1) |
| | |
| | for i in range(len(df)): |
| | x = guess_date(df['Date/Time'][i]) |
| | df['Date/Time'][i] = x.strftime("%Y-%m-%d") |
| | df['Date/Time'] = pd.to_datetime(df['Date/Time']) |
| | df.fillna(0, inplace=True) |
| | |
| | modelTFT(df, prax) |
| | prax[2] = "TFT" |
| | generate_csv(prax) |
| | modelTFT_OpenGap(df, prax) |
| | prax[2] = "TFT_OpenGap" |
| | generate_csv(prax) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | prax=["","","","","","",""] |
| | generate_csv(prax) |
| | |
| | prax = [0,0,0,0,0,0,0] |
| | f1 = fileOutput() |
| | return f1 |
| |
|
| | gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | gradioApp.launch() |
| |
|