File size: 5,424 Bytes

093b0a5

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Training on more temporal data is better. \n",
    "If it is not better with default split, try a fixed split. If it is not better with that try gradually sampling older data less. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from utils.tools import dotdict\n",
    "from exp.exp_informer import Exp_Informer\n",
    "import torch\n",
    "from utils.ipynb_helpers import setting_from_args, handle_gpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "args = dotdict()\n",
    "\n",
    "args.model = \"informer\"  # model of experiment, options: [informer, informerstack, informerlight(TBD)]\n",
    "\n",
    "args.data = \"custom\"  # data\n",
    "args.root_path = \"./data/stock/\"  # root path of data file\n",
    "\n",
    "\n",
    "args.data_path = \"close.csv\"  # data file\n",
    "args.features = \"MS\"  # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate\n",
    "args.target = \"XOM_close\"  # target feature in S or MS task\n",
    "args.freq = \"t\"  # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h\n",
    "args.checkpoints = \"./checkpoints\"  # location of model checkpoints\n",
    "\n",
    "# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]\n",
    "\n",
    "args.c_out = 1  # output size\n",
    "args.factor = 5  # probsparse attn factor\n",
    "args.d_model = 512  # dimension of model\n",
    "args.n_heads = 8  # num of heads\n",
    "args.e_layers = 2  # num of encoder layers\n",
    "args.d_layers = 1  # num of decoder layers\n",
    "args.d_ff = 2048  # dimension of fcn in model\n",
    "args.dropout = 0.05  # dropout\n",
    "args.attn = \"prob\"  # attention used in encoder, options:[prob, full]\n",
    "args.t_embed = \"timeF\"  # time features encoding, options:[timeF, fixed, learned]\n",
    "args.activation = \"gelu\"  # activation\n",
    "args.distil = True  # whether to use distilling in encoder\n",
    "args.output_attention = False  # whether to output attention in ecoder\n",
    "args.mix = True\n",
    "args.padding = 0\n",
    "\n",
    "args.batch_size = 32\n",
    "args.learning_rate = 0.00001\n",
    "args.loss = \"mse\"\n",
    "args.lradj = \"type1\"\n",
    "args.use_amp = False  # whether to use automatic mixed precision training\n",
    "\n",
    "args.num_workers = 0\n",
    "args.itr = 1  # number of runs\n",
    "args.max_epochs = 10\n",
    "args.patience = 4\n",
    "args.des = \"assumption_time\"\n",
    "\n",
    "args.inverse = True  # Defaultly False but @Zac thinks it should be True\n",
    "\n",
    "handle_gpu(args, None)\n",
    "\n",
    "# idk what this is for\n",
    "args.detail_freq = args.freq\n",
    "args.freq = args.freq[-1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "args.seq_len = 128  # input sequence length of Informer encoder\n",
    "args.label_len = 64  # start token length of Informer decoder\n",
    "args.pred_len = 16  # prediction sequence length\n",
    "\n",
    "args.cols = [args.target]\n",
    "args.enc_in = 1  # encoder input size\n",
    "args.dec_in = 1  # decoder input size\n",
    "\n",
    "\n",
    "# Test data is always just the data after this date\n",
    "args.date_test = \"2022-04-01\"\n",
    "\n",
    "exp = None\n",
    "setting = None\n",
    "\n",
    "# \"2022-01-01\",\n",
    "date_starts = [\"2021-01-01\", \"2020-01-01\", \"2018-01-01\", None]\n",
    "ii = 0\n",
    "for date_start in date_starts:\n",
    "\n",
    "    args.date_start = date_start\n",
    "\n",
    "    print(\"Args in experiment:\")\n",
    "    print(args)\n",
    "\n",
    "    Exp = Exp_Informer\n",
    "    for _ in range(args.itr):\n",
    "        # setting record of experiments\n",
    "        setting = setting_from_args(args, ii)\n",
    "\n",
    "        # set experiments\n",
    "        exp = Exp(args)\n",
    "\n",
    "        # train\n",
    "        print(f\"=======date_start : {args.date_start}=====================\")\n",
    "        print(f\">>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>\")\n",
    "        exp.train(setting)\n",
    "\n",
    "        # test\n",
    "        print(f\">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\")\n",
    "        exp.test(setting)\n",
    "\n",
    "        torch.cuda.empty_cache()\n",
    "        ii += 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "former",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6 (main, Oct 24 2022, 16:07:47) [GCC 11.2.0]"
  },
  "vscode": {
   "interpreter": {
    "hash": "44e5710a47a66ec240c2a0834fd7c20e15c61536e70be6891d892a39679ad994"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}