{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "5IM6CZzW_CH0" }, "source": [ "# Stockformer Demo" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "b5GFng7v7Eq0" }, "outputs": [], "source": [ "import sys\n", "\n", "# if not 'Informer2020' in sys.path:\n", "# sys.path += ['Informer2020']" ] }, { "cell_type": "markdown", "metadata": { "id": "rIjZdN5e_SWe" }, "source": [ "## Experiments: Train and Test" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RPdt-Kwc_RRZ" }, "outputs": [], "source": [ "from utils.tools import dotdict\n", "from exp.exp_informer import Exp_Informer\n", "import torch\n", "import numpy as np\n", "import pandas as pd\n", "import os\n", "from pprint import pprint\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from utils.ipynb_helpers import (\n", " args_from_setting,\n", " setting_from_args,\n", " handle_gpu,\n", " read_data,\n", ")\n", "from utils.stock_metrics import (\n", " apply_threshold_metric,\n", " PctProfitDirection,\n", " PctProfitTanh,\n", " PctDirection\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6mx2dnwY9dWi" }, "outputs": [], "source": [ "args = dotdict()\n", "args.des = \"full_1h\"\n", "\n", "args.model = \"stockformer\" # 'stockformer'\n", "\n", "args.data = \"custom\" # data\n", "args.checkpoints = \"./checkpoints\" # location of model checkpoints\n", "args.root_path = \"./data/stock/\" # root path of data file\n", "\n", "args.data_path = \"full_1h.csv\" # data file\n", "args.freq = \"h\" # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h\n", "\n", "args.features = \"MS\" # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate\n", "args.target = \"XOM_pctchange\" # target feature in S or MS task\n", "\n", "\n", "args.seq_len = 16 # input sequence length of Informer encoder\n", "args.label_len = 1 # start token length of Informer decoder\n", "args.pred_len = 1 # prediction sequence length\n", "\n", "# [\"XOM_close\", \"BP_close\", \"CVX_close\", \"WTI_close\"]\n", "# [\"XOM_open\", \"XOM_high\", \"XOM_low\", \"XOM_close\", \"XOM_volume\", \"XOM_pctchange\", \"XOM_shortsma\"]\n", "args.cols = [\n", " \"XOM_pctchange\", # \"XOM_open\", \"XOM_close\", , \"XOM_shortsma\",\n", " \"CVX_pctchange\",\n", " \"COP_pctchange\",\n", " \"BP_pctchange\",\n", " \"PBR_pctchange\",\n", " \"WTI_pctchange\",\n", " \"EOG_pctchange\",\n", " \"ENB_pctchange\",\n", " \"SLB_pctchange\",\n", "] #'C:USDSAR_pctchange'\n", "\n", "args.enc_in = len(args.cols) # encoder input size\n", "# args.dec_in = len(args.cols) # decoder input size # TODO: Remove\n", "args.c_out = 1 if args.features in [\"S\", \"MS\"] else args.dec_in # output size\n", "\n", "\n", "args.d_model = 128 # dimension of model; also the dimension of the token embeddings\n", "args.n_heads = 8 # num of attention heads\n", "args.e_layers = 12 # num of encoder layers\n", "# args.d_layers = 4 # num of decoder layers # TODO: Remove\n", "args.d_ff = 2048 # dimension of fcn in model\n", "args.dropout = 0.05 # dropout\n", "args.t_embed = None # time features encoding, options:[timeF, fixed, learned, None]\n", "args.activation = \"gelu\" # activation\n", "\n", "args.attn = \"full\" # attention used in encoder, options:[prob, full]\n", "args.factor = 5 # probsparse attn factor; doesn't matter unless args.attn==prob\n", "args.distil = False # whether to use distilling in encoder\n", "args.output_attention = False # whether to output attention in encoder\n", "args.mix = False # whether to use mixed attention\n", "args.padding = 0 # TODO: Remove\n", "\n", "args.batch_size = 256 # 64\n", "args.learning_rate = 0.00001\n", "args.loss = \"stock_tanh\" # What loss function to use: [\"mse\", \"stock_lpp\", \"stock_lppns\", \"stock_tanh\"]\n", "args.lradj = None # What learning rate scheduler to use: [\"type2\", None, \"type1\"]\n", "args.max_epochs = 50\n", "args.patience = 30 # For early stopping\n", "\n", "args.use_amp = False # whether to use automatic mixed precision training\n", "args.num_workers = 0\n", "args.itr = 1 # number of runs\n", "\n", "args.scale = True # whether to scale to mean 0, var 1\n", "args.inverse = True # whether to invert that scale before loss is calculated, lets keep this at False\n", "\n", "# This is for debugging to overfit\n", "# When True, patience doesn't matter at all and the model-state that is saved is the one after the last epoch\n", "# When False, the model-state that is saved is the one with the highest validation-loss and we can early stop with patience\n", "args.no_early_stop = False\n", "\n", "\n", "# Control data split from args, either a date string like \"2000-01-30\" or None (for default)\n", "args.date_start = \"2012-01-01\" # Train data starts on this date, default is to go back as far as possible\n", "args.date_end = \"2020-01-01\" # Train data starts on this date, default is to go back as far as possible\n", "args.date_test = \"2019-06-01\" # Test data is data after this date, default is to use ~20% of the data as test data\n", "\n", "\n", "# args.load_model_path = \"stockformer_custom_ftMS_sl16_ll4_pl1_ei12_di12_co1_iFalse_dm512_nh8_el12_dl4_df2048_atfull_fc5_ebtimeF_dtFalse_mxFalse_pretrain_full_1h_0/checkpoint-pretrain.pth\"\n", "\n", "# Code to handle gpu\n", "# None to use all available GPUs\n", "# False for not using GPUs\n", "# 0 for using cuda:0\n", "# \"0,1\" for using both cuda:0 and cuda:1\n", "handle_gpu(args, None)\n", "\n", "# TODO: Figure out what this is for\n", "args.detail_freq = args.freq\n", "args.freq = args.freq[-1:]\n", "\n", "\n", "print(\"Args in experiment:\")\n", "print(args)\n", "Exp = Exp_Informer\n", "\n", "\n", "# # Generate config\n", "# import json\n", "# with open(\"configs/placeholder.json\", \"w\") as f:\n", "# json.dump(args, f, indent=4)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train & Test *args.itr* models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "928tzaA2AA2g", "outputId": "c19f673a-02d1-4f4d-91c3-d0f25e600443" }, "outputs": [], "source": [ "exp = None\n", "setting = None\n", "for ii in range(args.itr):\n", " # setting record of experiments\n", " setting = setting_from_args(args, ii)\n", "\n", " # set experiments\n", " exp = Exp(args)\n", "\n", " # train\n", " print(f\">>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>\")\n", " exp.train(setting)\n", "\n", " # test\n", " print(f\">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\")\n", " exp.test(setting, flag=\"test\", inverse=True)\n", " exp.test(setting, flag=\"val\", inverse=True)\n", " exp.test(setting, flag=\"train\", inverse=True)\n", "\n", " torch.cuda.empty_cache()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# exp.test(setting, flag=\"test\")#, inverse=False)\n", "# exp.test(setting, flag=\"val\")#, inverse=False)\n", "# exp.test(setting, flag=\"train\")#, inverse=False)" ] }, { "cell_type": "markdown", "metadata": { "id": "CDHF-HerAE3u" }, "source": [ "## Prediction" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nTkluNNcyMJt", "outputId": "780767fe-6321-4081-e827-6701daeb375b" }, "outputs": [], "source": [ "# If you already have a trained model, you can set the arguments and model path, then initialize a Experiment and use it to predict\n", "# Prediction is a sequence which is adjacent to the last date of the data, and does not exist in the data\n", "# If you want to get more information about prediction, you can refer to code `exp/exp_informer.py function predict()` and `data/data_loader.py class Dataset_Pred`\n", "\n", "manual = False\n", "\n", "if manual:\n", " setting = \"stockformer_custom_ftMS_sl16_ll4_pl1_ei12_di12_co1_iFalse_dm512_nh8_el12_dl4_df2048_atfull_fc5_ebNone_dtFalse_mxFalse_full_1h_0\"\n", " args = args_from_setting(setting, args)\n", " exp = Exp(args)\n", "\n", "path = os.path.join(args.checkpoints, setting, \"checkpoint.pth\")\n", "\n", "exp.predict(setting, True)\n", "\n", "# the prediction will be saved in ./results/{setting}/real_prediction.npy\n", "prediction = np.load(f\"./results/{setting}/real_prediction.npy\")\n", "\n", "print(prediction.shape)\n", "\n", "# plt.figure()\n", "# plt.plot(prediction[0,:,-1])\n", "# plt.show()" ] }, { "cell_type": "markdown", "metadata": { "id": "cNhEP_7sAgqC" }, "source": [ "## Visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vMRk8VkQ2Iko", "outputId": "bbf3cd10-7294-472d-e330-21e00f20963a" }, "outputs": [], "source": [ "# When we finished exp.train(setting) and exp.test(setting), we will get a trained model and the results of test experiment\n", "# The results of test experiment will be saved in ./results/{setting}/pred.npy (prediction of test dataset) and ./results/{setting}/true.npy (groundtruth of test dataset)\n", "\n", "tpd_dict = {}\n", "for flag in [\"train\", \"val\", \"test\"]:\n", " preds_path = f\"./results/{setting}/pred_{flag}.npy\"\n", " trues_path = f\"./results/{setting}/true_{flag}.npy\"\n", " dates_path = f\"./results/{setting}/date_{flag}.npy\"\n", " if (\n", " os.path.exists(preds_path)\n", " and os.path.exists(trues_path)\n", " and os.path.exists(dates_path)\n", " ):\n", " tpd_dict[flag] = (np.load(trues_path), np.load(preds_path), np.load(dates_path))\n", " # tpd_dict[flag] = list(zip(*sorted(zip(*tpd_dict[flag]), key=lambda x: x[-1])))\n", " s = np.argsort(tpd_dict[flag][2], axis=None)\n", " tpd_dict[flag] = list(map(lambda x: x[s], tpd_dict[flag]))\n", "\n", "\n", "print(\"Open true/pred data for:\", list(tpd_dict.keys()))\n", "\n", "# [samples, pred_len, dimensions]\n", "print(\n", " tpd_dict[\"train\"][0].shape, tpd_dict[\"val\"][0].shape, tpd_dict[\"test\"][0].shape, \"\\n\\n\"\n", ")\n", "\n", "for flag in tpd_dict:\n", " trues, preds, dates = tpd_dict[flag]\n", " print(\n", " f\"{flag}\\ttrues.shape: {trues.shape}, preds.shape: {preds.shape}, dates.shape: {preds.shape}\"\n", " )\n", "\n", " MSE = np.square(np.subtract(trues, preds)).mean()\n", " RMSE = np.sqrt(MSE)\n", " print(\"against preds\", MSE, RMSE)\n", "\n", " MSE = np.square(np.subtract(trues, np.zeros(preds.shape))).mean()\n", " RMSE = np.sqrt(MSE)\n", " print(\"against 0s\", MSE, RMSE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 265 }, "id": "kyPuOPGAAjl3", "outputId": "8554f6f8-c13a-43e1-b04b-5f27823445d0" }, "outputs": [], "source": [ "# draw OT prediction\n", "for flag in tpd_dict:\n", " trues, preds, dates = tpd_dict[flag]\n", " true = trues[:, 0, 0]\n", " pred = preds[:, 0, 0]\n", " date = dates[:, 0]\n", " plt.figure(num=flag, figsize=(16, 4))\n", " plt.title(flag)\n", " plt.plot(date, true, label=\"GroundTruth\", linestyle=\"\", marker=\".\", markersize=4)\n", " plt.plot(date, pred, label=\"Prediction\", linestyle=\"\", marker=\".\", markersize=4)\n", " plt.plot(date, np.zeros(date.shape), color=\"red\")\n", " # plt.scatter(range(trues.shape[0]), trues[:,0,0], marker='v', color='r', label='GroundTruth')\n", " # plt.scatter(range(trues.shape[0]), preds[:,0,0], marker='^', color='m', label='Prediction')\n", "\n", " plt.legend()\n", " plt.show()\n", "\n", " plt.figure(num=flag, figsize=(16, 4))\n", " plt.title(\"Diff histogram\")\n", " # plt.hist(np.abs(true), bins=len(true)//6, label='Diff 0', alpha=0.5)\n", " # plt.hist(np.abs(true - pred), bins=len(true)//6, label='Diff Pred', alpha=0.5)\n", " plt.hist(\n", " [np.abs(true), np.abs(true - pred)], bins=60, label=[\"Diff 0\", \"Diff Pred\"]\n", " )\n", " plt.xlabel(\"Diff Value\")\n", " plt.ylabel(\"Count\")\n", " plt.legend()\n", " plt.show()\n", "\n", " # df = pd.concat([pd.DataFrame(a, columns=[f\"{i}\"]) for i, a in enumerate([np.abs(true - pred), np.abs(true)])], axis=1)\n", "\n", " # # plot the data\n", " # df.plot.hist(stacked=True, bins=len(true), density=True, figsize=(10, 6), grid=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic back-test based on buying in predicted direction if prediction is above a threshold" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "max_tracker = (0, 0)\n", "\n", "# Tracks results\n", "tracker = {}\n", "\n", "df = read_data(os.path.join(args.root_path, args.data_path))\n", "\n", "# Get the percentile to check thresh until\n", "percentile = [50, 0.0]\n", "for flag in [\"train\"]: # tpd_dict:\n", " _, preds, _ = tpd_dict[flag]\n", " percentile[1] += np.percentile(\n", " np.abs(preds), percentile[0]\n", " ) # np.median(np.abs(preds))\n", "percentile[1] /= len(tpd_dict)\n", "print(f\"{percentile[0]}'th percentile: {percentile[1]}\")\n", "\n", "ticker, field = args.target.split(\"_\")\n", "assert field == \"logpctchange\"\n", "\n", "for thresh in np.linspace(0, percentile[1], 501):\n", " # print(\"thresh:\", thresh)\n", " tracker[thresh] = {}\n", " track = {}\n", " for flag in tpd_dict:\n", " trues, preds, dates = tpd_dict[flag]\n", " # trues, preds = np.exp(trues), np.exp(preds)\n", " true = trues[:, 0, 0]\n", " pred = preds[:, 0, 0]\n", " date = pd.DatetimeIndex(dates[:, 0], tz=\"UTC\")\n", "\n", " \n", "\n", " # Filter by thresh. Note in log scale\n", " pred_f, true_f = apply_threshold_metric(pred, true, thresh)\n", " df_f = df.loc[date[np.abs(pred) >= thresh]]\n", "\n", " # Percent direction correct, ie up or down\n", " pct_dir_correct = PctDirection.metric(pred, true)\n", "\n", " # Percent profit all in\n", " pct_profit_dir = PctProfitDirection.metric(pred_f, true_f, short_filter=0)\n", " pct_profit_dir_nshort = PctProfitDirection.metric(pred_f, true_f, short_filter=1)\n", " pct_profit_dir_oshort = PctProfitDirection.metric(pred_f, true_f, short_filter=2)\n", "\n", " # Percent profit with tanh partial purchase\n", " pct_profit_tanh = PctProfitTanh.metric(pred_f, true_f, short_filter=0)\n", " pct_profit_tanh_nshort = PctProfitTanh.metric(pred_f, true_f, short_filter=1)\n", " pct_profit_tanh_oshort = PctProfitTanh.metric(pred_f, true_f, short_filter=2)\n", "\n", " # Optimal percent profit\n", " pct_profit_dir_opt = PctProfitDirection.metric(true_f, true_f)\n", "\n", " # Tune threshhold based off of train's metric we care about\n", " tune_metric = pct_profit_tanh if args.loss == \"stock_tanh\" else pct_profit_dir\n", " if tune_metric > max_tracker[0] and flag == \"train\":\n", " max_tracker = (tune_metric, thresh)\n", "\n", " # Save\n", " tracker[thresh][flag] = {\n", " \"pct_profit_dir\": pct_profit_dir,\n", " \"pct_profit_dir_nshort\": pct_profit_dir_nshort,\n", " \"pct_profit_dir_oshort\": pct_profit_dir_oshort,\n", " \"pct_profit_tanh\": pct_profit_tanh,\n", " \"pct_profit_tanh_nshort\": pct_profit_tanh_nshort,\n", " \"pct_profit_tanh_oshort\": pct_profit_tanh_oshort,\n", " \"pct_excluded\": (len(pred) - len(pred_f[pred_f > 0])) / len(pred),\n", " \"pct_excluded_wshort\": (len(pred) - len(pred_f)) / len(pred),\n", " \"pct_dir_correct\": pct_dir_correct,\n", " \"pct_profit_dir_opt\": pct_profit_dir_opt,\n", " }\n", "\n", "\n", "best_thresh = max_tracker[1]\n", "print(\"best thresh:\", best_thresh)\n", "for data_group in tracker[best_thresh]:\n", " print(data_group, end=\"\\t\") \n", " pprint(tracker[best_thresh][data_group], indent=3)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, axs = plt.subplots(3,1, sharex=True, figsize=(16, 8))\n", "\n", "for flag in tpd_dict:\n", " trues, preds, dates = tpd_dict[flag]\n", " true = trues[:, 0, 0]\n", " pred = preds[:, 0, 0]\n", " date = pd.DatetimeIndex(dates[:, 0], tz=\"UTC\")\n", "\n", " # Filter by best_thresh. Note in log scale\n", " pred_f, true_f = apply_threshold_metric(pred, true, best_thresh)\n", " date_f = date[np.abs(pred) >= best_thresh]\n", "\n", " if \"lpp\" in args.loss:\n", " metric = PctProfitDirection\n", " metric_name = \"pct_profit_dir\"\n", " elif \"tanh\" in args.loss:\n", " metric = PctProfitTanh\n", " metric_name = \"pct_profit_tanh\"\n", "\n", "\n", "\n", " axs[0].plot(date_f, metric.accumulate(pred_f, true_f, short_filter=0), label=flag)\n", " axs[0].set_ylabel(metric_name)\n", " axs[0].set_title(metric_name)\n", " axs[0].grid(axis = 'y')\n", "\n", " axs[1].plot(date_f[pred_f > 0], metric.accumulate(pred_f, true_f, short_filter=1))#, label=flag)\n", " axs[1].set_ylabel(f\"{metric_name}_nshort\")\n", " axs[1].set_title(f\"{metric_name}_nshort\")\n", " axs[1].grid(axis = 'y')\n", "\n", " axs[2].plot(date_f[pred_f < 0], metric.accumulate(pred_f, true_f, short_filter=2))#, label=flag)\n", " axs[2].set_ylabel(f\"{metric_name}_oshort\")\n", " axs[2].set_title(f\"{metric_name}_oshort\")\n", " axs[2].grid(axis = 'y')\n", "\n", "fig.legend()\n", "fig.suptitle(\"Cumulative metrics overtime\")\n", "\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Attention Visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iflTTl0quCoK", "outputId": "3708fc91-517e-4c83-e133-059381bde271" }, "outputs": [], "source": [ "args.output_attention = True\n", "\n", "exp = Exp(args)\n", "\n", "model = exp.model\n", "\n", "path = os.path.join(args.checkpoints, setting, \"checkpoint.pth\")\n", "\n", "print(model.load_state_dict(torch.load(path)))\n", "\n", "df = pd.read_csv(os.path.join(args.root_path, args.data_path))\n", "df[args.cols].head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lDdzqm9HAk2C" }, "outputs": [], "source": [ "from data_provider.data_loader import Dataset_Custom\n", "from torch.utils.data import DataLoader\n", "\n", "Data = Dataset_Custom\n", "timeenc = 0 if args.t_embed != \"timeF\" else 1\n", "flag = \"test\"\n", "shuffle_flag = False\n", "drop_last = True\n", "batch_size = 1\n", "data_set = Data(args, flag=flag)\n", "\n", "data_loader = DataLoader(\n", " data_set,\n", " batch_size=batch_size,\n", " shuffle=shuffle_flag,\n", " num_workers=args.num_workers,\n", " drop_last=drop_last,\n", ")\n", "\n", "\n", "idx = 0\n", "for i, (batch_x, batch_y, batch_x_mark, batch_y_mark, ds_index) in enumerate(\n", " data_loader\n", "):\n", " if i != idx:\n", " continue\n", " batch_x = batch_x.float().to(exp.device)\n", " batch_y = batch_y.float()\n", "\n", " batch_x_mark = batch_x_mark.float().to(exp.device)\n", " batch_y_mark = batch_y_mark.float().to(exp.device)\n", "\n", " dec_inp = torch.zeros_like(batch_y[:, -args.pred_len :, :]).float()\n", " dec_inp = (\n", " torch.cat([batch_y[:, : args.label_len, :], dec_inp], dim=1)\n", " .float()\n", " .to(exp.device)\n", " )\n", "\n", " outputs, attn = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n", "\n", "\n", "print(attn[0].shape, attn[1].shape) # , attn[2].shape\n", "\n", "\n", "layers = [0, 1]\n", "distil = \"Distil\" if args.distil else \"NoDistil\"\n", "for layer in layers:\n", " print(\"\\n\\n==========================\")\n", " print(\"Showing attention layer\", layer)\n", " print(\"==========================\\n\\n\")\n", " for h in range(0, args.n_heads):\n", " plt.figure(figsize=[10, 8])\n", " plt.title(f\"Informer, {distil}, attn:{args.attn} layer:{layer} head:{h}\")\n", " A = attn[layer][0, h].detach().cpu().numpy()\n", " ax = sns.heatmap(A, vmin=0, vmax=A.max() + 0.01)\n", " plt.show()" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "provenance": [], "toc_visible": true }, "kernelspec": { "display_name": "former", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6 (main, Oct 24 2022, 16:07:47) [GCC 11.2.0]" }, "vscode": { "interpreter": { "hash": "44e5710a47a66ec240c2a0834fd7c20e15c61536e70be6891d892a39679ad994" } } }, "nbformat": 4, "nbformat_minor": 0 }