qlearning_v1-6 using PRB reward

Browse files

Files changed (8) hide show

README.md +2 -1
fin_rl_PPO_v3.ipynb +0 -0
fin_rl_qlearning_v1-3.ipynb +0 -0
fin_rl_qlearning_v1-4.ipynb +2 -52
fin_rl_qlearning_v1-5.ipynb +0 -0
fin_rl_qlearning_v1-6.ipynb +1295 -0
fin_rl_qlearning_v1-7.ipynb +0 -0
todo_next.txt +2 -0

README.md CHANGED Viewed

@@ -9,4 +9,5 @@
 # Q-learning
 Now using q-learnig with a custom enviroment
-fin_rl_qlearning_v1.ipynb

 # Q-learning
 Now using q-learnig with a custom enviroment
+fin_rl_qlearning_v1.ipynb

fin_rl_PPO_v3.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

fin_rl_qlearning_v1-3.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

fin_rl_qlearning_v1-4.ipynb CHANGED Viewed

@@ -837,59 +837,9 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 22,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "[(1, 4, 8.645273113640826, 11.755207560128675, 3.1099344464878484),\n",
-              " (5, 7, 11.171935380608351, 9.6441605861055, 1.527774794502852),\n",
-              " (9, 13, 10.295810456549015, 8.282581743389205, 2.01322871315981),\n",
-              " (14, 16, 9.137837754794978, 11.628818062612993, 2.490980307818015),\n",
-              " (17, 19, 11.608355073821109, 9.857818739078045, 1.7505363347430638),\n",
-              " (21, 26, 10.057416515635682, 8.528742398078922, 1.5286741175567595),\n",
-              " (27, 32, 9.74902775472201, 9.670699802197579, 0.0783279525244307),\n",
-              " (33, 35, 9.78769038301581, 10.300434254468536, 0.5127438714527255),\n",
-              " (36, 38, 9.60217789374197, 8.250852689557544, 1.3513252041844268),\n",
-              " (39, 40, 8.971004003020443, 10.395078818094584, 1.4240748150741407),\n",
-              " (42, 44, 11.673729766919589, 9.944263607173118, 1.7294661597464707),\n",
-              " (45, 46, 9.636292392244973, 9.991269928983368, 0.35497753673839405),\n",
-              " (47, 51, 10.360163262862152, 8.430825035963325, 1.9293382268988264),\n",
-              " (52, 54, 9.554103757397273, 11.742814826136176, 2.188711068738902),\n",
-              " (55, 57, 11.403617185189336, 9.71178561119097, 1.6918315739983658),\n",
-              " (58, 59, 9.736562354250092, 10.197672768283018, 0.46111041403292674),\n",
-              " (60, 64, 10.337689357211167, 8.817539963222865, 1.5201493939883015),\n",
-              " (65, 69, 10.199032988393924, 10.042850712802524, 0.15618227559139974),\n",
-              " (70, 71, 9.63107312063706, 9.925392109060335, 0.2943189884232744),\n",
-              " (72, 76, 10.338918119518627, 8.35252995759046, 1.9863881619281667),\n",
-              " (77, 83, 9.365049578445921, 9.693865537165568, -0.3288159587196464),\n",
-              " (84, 89, 10.138407662002876, 8.67933961768415, 1.4590680443187267),\n",
-              " (90, 95, 10.000361732237739, 9.63951969965352, 0.3608420325842179),\n",
-              " (96, 97, 9.861816695922618, 10.30627224611643, 0.4444555501938119),\n",
-              " (98, 107, 10.232441825356547, 9.836950696890277, 0.3954911284662703),\n",
-              " (108, 109, 9.661180634141186, 10.074844279839029, 0.4136636456978433),\n",
-              " (110, 114, 10.368921355130126, 8.558052792767318, 1.810868562362808),\n",
-              " (115, 120, 9.801685702157227, 9.662201837344819, 0.1394838648124086),\n",
-              " (121, 123, 9.80253246200774, 10.288038598865318, 0.4855061368575786),\n",
-              " (124, 127, 9.567447370545475, 9.01427913814844, 0.5531682323970344),\n",
-              " (128, 132, 10.44659531472739, 9.919971609023063, 0.5266237057043259),\n",
-              " (133, 134, 9.639891825308629, 10.008971159712608, 0.36907933440397933),\n",
-              " (135, 139, 10.363750373990332, 8.455059573520186, 1.9086908004701453),\n",
-              " (140, 145, 9.605625627674012, 9.699404019036715, -0.0937783913627026),\n",
-              " (146, 148, 9.749444489037334, 10.32917728184808, 0.5797327928107467),\n",
-              " (149, 152, 9.695479843750826, 8.857054399227408, 0.838425444523418),\n",
-              " (153, 157, 10.25168805575754, 10.015435969355776, 0.23625208640176432),\n",
-              " (158, 159, 9.63115885189913, 9.942821850118737, 0.31166299821960664),\n",
-              " (160, 164, 10.345751755784269, 8.371449341610663, 1.9743024141736054),\n",
-              " (165, 170, 9.414760057555497, 9.751117491956888, -0.3363574344013909)]"
-            ]
-          },
-          "execution_count": 22,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
       "source": [
         "env_test._trade_tick_history"
       ]

     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {},
+      "outputs": [],
       "source": [
         "env_test._trade_tick_history"
       ]

fin_rl_qlearning_v1-5.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

fin_rl_qlearning_v1-6.ipynb ADDED Viewed

	@@ -0,0 +1,1295 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nwaAZRu1NTiI"
+      },
+      "source": [
+        "# Q-learning \n",
+        "\n",
+        "#### This version implements q-learning using a custom enviroment 1 day, with synthetic data, this version implements qtable with SQLITE so you can add several features in the state \n",
+        "\n",
+        "##### Experiments\n",
+        "- Change the reward function and see the results on trading \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "DDf1gLC2NTiK"
+      },
+      "outputs": [],
+      "source": [
+        "# !pip install -r ./requirements.txt\n",
+        "# !pip install stable_baselines3\n",
+        "# !pip install yfinance\n",
+        "# !pip install talib-binary\n",
+        "# !pip install huggingface_sb3\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LNXxxKojNTiL"
+      },
+      "outputs": [],
+      "source": [
+        "import gym\n",
+        "from gym import spaces\n",
+        "from gym.utils import seeding\n",
+        "\n",
+        "import talib as ta\n",
+        "from tqdm.notebook import tqdm\n",
+        "\n",
+        "import yfinance as yf\n",
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "from matplotlib import pyplot as plt\n",
+        "import timeit\n",
+        "import sqlite3\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def get_syntetic_data(tf, start_date, end_date, plot=True, add_noise=None):\n",
+        "    df = pd.date_range(start=start_date, end=end_date, freq=tf)\n",
+        "    df = df.to_frame()\n",
+        "\n",
+        "    df['v1'] = np.arange(len(df.index))\n",
+        "    df[['Open','High','Low','Close','Volume']] = 0.0\n",
+        "    df = df.drop([0], axis=1)\n",
+        "\n",
+        "    df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x/3)+10 )\n",
+        "    # df[\"Close\"]=df[\"v1\"].map(lambda x: np.sin(x)+10 + np.sin(x/2) )\n",
+        "    if add_noise is not None: # could be 0.5\n",
+        "        noise = np.random.normal(0, add_noise, len(df))\n",
+        "        df[\"Close\"] += noise\n",
+        "\n",
+        "    if plot:\n",
+        "        plt.figure(figsize=(15,6))\n",
+        "        df['Close'].tail(30).plot()\n",
+        "\n",
+        "    df[\"Open\"]=df[\"Close\"].shift(1)\n",
+        "    df = df.dropna()\n",
+        "    x = 1.5\n",
+        "    df[\"High\"] = np.where( df[\"Close\"] > df['Open'], df[\"Close\"]+x, df[\"Open\"]+x )\n",
+        "    df[\"Low\"] = np.where( df[\"Close\"] < df['Open'], df[\"Close\"]-x, df[\"Open\"]-x )\n",
+        "    df[\"Volume\"] = 10\n",
+        "    return df"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dmAuEhZZNTiL"
+      },
+      "outputs": [],
+      "source": [
+        "# Get data\n",
+        "eth_usd = yf.Ticker(\"ETH-USD\")\n",
+        "eth = eth_usd.history(period=\"max\")\n",
+        "\n",
+        "btc_usd = yf.Ticker(\"BTC-USD\")\n",
+        "btc = btc_usd.history(period=\"max\")\n",
+        "print(len(btc))\n",
+        "print(len(eth))\n",
+        "\n",
+        "btc_train = eth[-3015:-200]\n",
+        "# btc_test = eth[-200:]\n",
+        "eth_train = eth[-1864:-200]\n",
+        "eth_test = eth[-200:]\n",
+        "# len(eth_train)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# use synthetic data\n",
+        "# synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2015-02-05\", add_noise=None)\n",
+        "synthetic_data = get_syntetic_data(tf=\"D\", start_date=\"2015-01-01\", end_date=\"2023-01-01\", add_noise=None)\n",
+        "eth_train = synthetic_data[-1864:-200]\n",
+        "eth_test = synthetic_data[-200:]\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\n",
+        "class Qtable:\n",
+        "    def __init__(self):\n",
+        "        self.conn = sqlite3.connect(':memory:')\n",
+        "        self.cursor = self.conn.cursor()\n",
+        "\n",
+        "    def create_table(self):\n",
+        "        columns = [(\"s_position\", \"INTEGER\"),(\"s_di\", \"INTEGER\"), (\"s_mfi\", \"INTEGER\"), (\"s_stock_d\", \"INTEGER\"),(\"s_adx\", \"INTEGER\"), (\"action\", \"INTEGER\"), (\"qvalue\", \"REAL\")]\n",
+        "        columns_string = \", \".join([f\"{name} {data_type}\" for name, data_type in columns])\n",
+        "        columns_keys = \"(s_position, s_di, s_mfi, s_stock_d, s_adx, action)\"\n",
+        "        query = f\"CREATE TABLE IF NOT EXISTS QTABLE ({columns_string}, PRIMARY KEY {columns_keys})\"\n",
+        "        self.cursor.execute(query)\n",
+        "        self.conn.commit()\n",
+        "\n",
+        "    def set_q_value(self, state, action, qvalue):\n",
+        "        query = f\"INSERT INTO QTABLE (s_position, s_di, s_mfi, s_stock_d, s_adx, action, qvalue) VALUES (?,?,?,?,?,?,?) ON CONFLICT (s_position, s_di, s_mfi, s_stock_d, s_adx, action) DO UPDATE SET qvalue=?\"\n",
+        "        self.cursor.execute(query,state.tolist()+[action]+[qvalue]+[qvalue])\n",
+        "        self.conn.commit()\n",
+        "\n",
+        "    def get_q_value(self, state, action):\n",
+        "        self.cursor.execute(\"SELECT qvalue from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=? and action=?\",state.tolist()+[action])\n",
+        "        rows = self.cursor.fetchall()\n",
+        "        if len(rows) > 0:\n",
+        "            return rows[0][0]\n",
+        "        return None\n",
+        "\n",
+        "    def get_max_q_value(self, state):\n",
+        "        self.cursor.execute(\"SELECT max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
+        "        rows = self.cursor.fetchall()\n",
+        "        if len(rows) > 0:\n",
+        "            return rows[0][0]\n",
+        "        return None\n",
+        "\n",
+        "    def get_max_action(self, state):\n",
+        "        self.cursor.execute(\"SELECT action, max(qvalue) from QTABLE where s_position=? and s_di=? and s_mfi=? and s_stock_d=? and s_adx=?\",state.tolist())\n",
+        "        rows = self.cursor.fetchall()\n",
+        "        if len(rows) > 0:\n",
+        "            return rows[0][0]\n",
+        "        return None\n",
+        "\n",
+        "    def getall(self):\n",
+        "        self.cursor.execute(\"SELECT * from QTABLE \")\n",
+        "        return self.cursor.fetchall()\n",
+        "    \n",
+        "    "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def initialize_q_table():\n",
+        "    # s_ state variables\n",
+        "    qtable = Qtable()\n",
+        "    qtable.create_table()  \n",
+        "    return qtable"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Policy\n",
+        "\n",
+        "def greedy_policy(Qtable, state):\n",
+        "    # Exploitation: take the action with the highest state, action value\n",
+        "    # if we dont have a state with values return DO_NOTHING \n",
+        "    action = Qtable.get_max_action(state)\n",
+        "    # if action is None:\n",
+        "    #     action = 2\n",
+        "    # action = np.argmax(Qtable[state])\n",
+        "    return action\n",
+        "\n",
+        "\n",
+        "def epsilon_greedy_policy(Qtable, state, epsilon, env):\n",
+        "  # Randomly generate a number between 0 and 1\n",
+        "  random_num = np.random.uniform(size=1)\n",
+        "  # if random_num > greater than epsilon --> exploitation\n",
+        "  if random_num > epsilon:\n",
+        "    # Take the action with the highest value given a state\n",
+        "    # np.argmax can be useful here\n",
+        "    action = greedy_policy(Qtable, state)\n",
+        "  # else --> exploration\n",
+        "  else:\n",
+        "    # action = np.random.random_integers(4,size=1)[0]\n",
+        "    action = env.action_space.sample()\n",
+        "  \n",
+        "  return action"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wlC-EdLENTiN"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "def train(n_training_episodes, min_epsilon, max_epsilon, decay_rate, env, max_steps, Qtable, learning_rate, gamma):\n",
+        "  state_history = []\n",
+        "#   np.random.seed(42)\n",
+        "  for episode in range(n_training_episodes):\n",
+        "    # Reduce epsilon (because we need less and less exploration)\n",
+        "    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)\n",
+        "    # Reset the environment\n",
+        "    state = env.reset()\n",
+        "    step = 0\n",
+        "    done = False\n",
+        "\n",
+        "    # repeat\n",
+        "    for step in range(max_steps):\n",
+        "      # Choose the action At using epsilon greedy policy\n",
+        "      action = epsilon_greedy_policy(Qtable, state, epsilon, env)\n",
+        "\n",
+        "      # Take action At and observe Rt+1 and St+1\n",
+        "      # Take the action (a) and observe the outcome state(s') and reward (r)\n",
+        "      new_state, reward, done, info = env.step(action)\n",
+        "\n",
+        "      # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]\n",
+        "      # Qtable[state][action] = Qtable[state][action] + learning_rate * (reward + gamma * ( np.max(Qtable[new_state])  ) -  Qtable[state][action] )\n",
+        "      qvalue =  Qtable.get_q_value(state, action)\n",
+        "      if qvalue is None:\n",
+        "        qvalue = 0\n",
+        "\n",
+        "      q_max_state = Qtable.get_max_q_value(new_state)\n",
+        "      if q_max_state is None:\n",
+        "        q_max_state = 0\n",
+        "      \n",
+        "      n_qvalue = qvalue + learning_rate * (reward + gamma * ( q_max_state  ) -  qvalue )\n",
+        "      Qtable.set_q_value(state, action, n_qvalue)\n",
+        "\n",
+        "      # If done, finish the episode\n",
+        "      if done:\n",
+        "        break\n",
+        "      \n",
+        "      # Our next state is the new state\n",
+        "      state = new_state\n",
+        "\n",
+        "      state_history.append(state)  \n",
+        "\n",
+        "  return Qtable, state_history"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def evaluate_agent(env, max_steps, n_eval_episodes, Q, random=False):\n",
+        "  \"\"\"\n",
+        "  Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.\n",
+        "  :param env: The evaluation environment\n",
+        "  :param n_eval_episodes: Number of episode to evaluate the agent\n",
+        "  :param Q: The Q-table\n",
+        "  :param seed: The evaluation seed array (for taxi-v3)\n",
+        "  \"\"\"\n",
+        "  episode_positive_perc_trades = []\n",
+        "  episode_rewards = []\n",
+        "  episode_profits = []\n",
+        "  for episode in tqdm(range(n_eval_episodes), disable=random):\n",
+        "    state = env.reset()\n",
+        "    step = 0\n",
+        "    done = False\n",
+        "    total_rewards_ep = 0\n",
+        "    total_profit_ep = 0\n",
+        "    \n",
+        "    for step in range(max_steps):\n",
+        "      # Take the action (index) that have the maximum expected future reward given that state\n",
+        "      if random:\n",
+        "        action = env.action_space.sample()\n",
+        "      else:\n",
+        "        action = greedy_policy(Q, state)\n",
+        "\n",
+        "      new_state, reward, done, info = env.step(action)\n",
+        "      total_rewards_ep += reward\n",
+        "        \n",
+        "      if done:\n",
+        "        break\n",
+        "      state = new_state\n",
+        "\n",
+        "    if len(env._trade_history) > 0:\n",
+        "        episode_positive_perc_trades.append(np.count_nonzero(np.array(env._trade_history) > 0)/len(env._trade_history))\n",
+        "    episode_rewards.append(total_rewards_ep)\n",
+        "    episode_profits.append(env.history['total_profit'][-1])\n",
+        "    # print(env.history)\n",
+        "    # env.render()\n",
+        "    # assert 0\n",
+        "\n",
+        "  mean_reward = np.mean(episode_rewards)\n",
+        "  std_reward = np.std(episode_rewards)\n",
+        "  mean_profit = np.mean(episode_profits)\n",
+        "  std_profit = np.std(episode_profits)\n",
+        "  positive_perc_trades = np.mean(episode_positive_perc_trades)\n",
+        "\n",
+        "  return mean_reward, std_reward, mean_profit, std_profit, positive_perc_trades"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from enum import Enum\n",
+        "class Actions(Enum):\n",
+        "    Sell = 0\n",
+        "    Buy = 1\n",
+        "    Do_nothing = 2\n",
+        "\n",
+        "class CustTradingEnv(gym.Env):\n",
+        "\n",
+        "    def __init__(self, df, max_steps=0, random_start=True):\n",
+        "        self.seed(seed=43)\n",
+        "        self.df = df\n",
+        "        self.prices, self.signal_features = self._process_data()\n",
+        "\n",
+        "        # spaces\n",
+        "        self.action_space = spaces.Discrete(3)\n",
+        "        self.observation_space = spaces.Box(low=0, high=1999, shape=(1,) , dtype=np.float64)\n",
+        "\n",
+        "        # episode\n",
+        "        self._start_tick = 0\n",
+        "        self._end_tick = 0\n",
+        "        self._done = None\n",
+        "        self._current_tick = None\n",
+        "        self._last_trade_tick = None\n",
+        "        self._position = None\n",
+        "        self._position_history = None\n",
+        "        self._total_reward = None\n",
+        "        self._total_profit = None\n",
+        "        self._first_rendering = None\n",
+        "        self.history = None\n",
+        "        self._max_steps = max_steps\n",
+        "        self._start_episode_tick = None\n",
+        "        self._trade_history = None\n",
+        "        self._trade_tick_history = None\n",
+        "        self._random_start = random_start\n",
+        "        self._action_history = None\n",
+        "\n",
+        "    def reset(self):\n",
+        "        self._done = False\n",
+        "        if self._random_start:\n",
+        "            self._start_episode_tick = np.random.randint(1,high=len(self.df)- self._max_steps )\n",
+        "            self._end_tick = self._start_episode_tick + self._max_steps\n",
+        "        else:\n",
+        "            self._start_episode_tick = 1\n",
+        "            self._end_tick = len(self.df)-1\n",
+        "        # self._start_episode_tick = np.random.randint(1,len(self.df)- self._max_steps )\n",
+        "        # self._end_tick = self._start_episode_tick + self._max_steps\n",
+        "        self._current_tick = self._start_episode_tick\n",
+        "        self._last_trade_tick = self._current_tick - 1\n",
+        "        self._position = 0\n",
+        "        self._action_history = [-1] * (len(self.prices)) \n",
+        "        # self._position_history = (self.window_size * [None]) + [self._position]\n",
+        "        self._total_reward = 0.\n",
+        "        self._total_profit = 0.\n",
+        "        self._trade_history = []\n",
+        "        self._trade_tick_history = []\n",
+        "        self.history = {}\n",
+        "        return self._get_observation()\n",
+        "\n",
+        "\n",
+        "    def step(self, action):\n",
+        "        self._done = False\n",
+        "        self._current_tick += 1\n",
+        "\n",
+        "        if self._current_tick == self._end_tick:\n",
+        "            self._done = True\n",
+        "\n",
+        "        self._do_act(action)\n",
+        "        step_reward = self._calculate_reward(action)\n",
+        "        self._total_reward += step_reward\n",
+        "\n",
+        "        observation = self._get_observation()\n",
+        "        info = dict(\n",
+        "            total_reward = self._total_reward,\n",
+        "            total_profit = self._total_profit,\n",
+        "            position = self._position,\n",
+        "            action = action\n",
+        "        )\n",
+        "        self._update_history(info)\n",
+        "\n",
+        "        return observation, step_reward, self._done, info\n",
+        "\n",
+        "    def seed(self, seed=None):\n",
+        "        self.np_random, seed = seeding.np_random(seed)\n",
+        "        return [seed]\n",
+        "        \n",
+        "    def _get_observation(self):\n",
+        "        if self._position > 0:\n",
+        "            position = 1\n",
+        "        elif self._position < 0:\n",
+        "            position = -1\n",
+        "        else:\n",
+        "            position = 0\n",
+        "        return np.concatenate( [[position], self.signal_features[self._current_tick]] )\n",
+        "\n",
+        "    def _update_history(self, info):\n",
+        "        if not self.history:\n",
+        "            self.history = {key: [] for key in info.keys()}\n",
+        "\n",
+        "        for key, value in info.items():\n",
+        "            self.history[key].append(value)\n",
+        "\n",
+        "\n",
+        "    def render(self, mode='human'):\n",
+        "        window_ticks = np.arange(len(self.prices))\n",
+        "        prices = self.prices\n",
+        "        # prices = self.prices[self._start_episode_tick:self._end_tick+1]\n",
+        "        plt.plot(prices)\n",
+        "\n",
+        "        open_buy = []\n",
+        "        close_buy = []\n",
+        "        open_sell = []\n",
+        "        close_sell = []\n",
+        "        do_nothing = []\n",
+        "        penalty = []\n",
+        "        action_not_in_table = []\n",
+        "\n",
+        "        for i, tick in enumerate(window_ticks):\n",
+        "            if self._action_history[i] == 1:\n",
+        "                open_buy.append(tick)\n",
+        "            elif self._action_history[i] == 2 :\n",
+        "                close_buy.append(tick)\n",
+        "            elif self._action_history[i] == 3 :\n",
+        "                open_sell.append(tick)\n",
+        "            elif self._action_history[i] == 4 :\n",
+        "                close_sell.append(tick)\n",
+        "            elif self._action_history[i] == 0 :\n",
+        "                do_nothing.append(tick)\n",
+        "            elif self._action_history[i] == 5 :\n",
+        "                penalty.append(tick)\n",
+        "            elif self._action_history[i] == 6 :\n",
+        "                action_not_in_table.append(tick)\n",
+        "\n",
+        "        plt.plot(open_buy, prices[open_buy], 'go', marker=\"^\")\n",
+        "        plt.plot(close_buy, prices[close_buy], 'go', marker=\"v\")\n",
+        "        plt.plot(open_sell, prices[open_sell], 'ro', marker=\"v\")\n",
+        "        plt.plot(close_sell, prices[close_sell], 'ro', marker=\"^\")\n",
+        "    \n",
+        "        plt.plot(do_nothing, prices[do_nothing], 'oc')\n",
+        "        plt.plot(penalty, prices[penalty], 'yo')\n",
+        "\n",
+        "        plt.plot(action_not_in_table, prices[action_not_in_table], 'ob')\n",
+        "\n",
+        "        plt.suptitle(\n",
+        "            \"Total Reward: %.6f\" % self._total_reward + ' ~ ' +\n",
+        "            \"Total Profit: %.6f\" % self._total_profit\n",
+        "        )\n",
+        "\n",
+        "    def _do_bin(self,df):\n",
+        "        df = pd.cut(df,bins=np.arange(0,105,5),labels=False, include_lowest=True)\n",
+        "        return df\n",
+        "\n",
+        "    # Our state will be encode with 4 features MFI and Stochastic(only D line), ADX and DI+DI-\n",
+        "    # the values of each feature will be binned in 10 bins, ex:\n",
+        "    # MFI goes from 0-100, if we get 25 will put on the second bin \n",
+        "    # DI+DI-  if DI+ is over DI- set (1 otherwise 0) \n",
+        "    # \n",
+        "    # that will give a state space of 10(MFI) * 10(STOCH) * 10(ADX) * 2(DI) = 2000 states\n",
+        "    # encoded as bins of  DI MFI STOCH ADX = 1 45.2  25.4  90.1 , binned = 1 4 2 9 state = 1429   \n",
+        "    def _process_data(self):\n",
+        "        timeperiod = 14\n",
+        "        self.df = self.df.copy()\n",
+        "        \n",
+        "        self.df['adx_r'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
+        "        self.df['mfi_r'] = ta.MFI(self.df['High'], self.df['Low'], self.df['Close'],self.df['Volume'], timeperiod=timeperiod)\n",
+        "        _, self.df['stock_d_r'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)\n",
+        "        self.df['p_di'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
+        "        self.df['m_di'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=timeperiod)\n",
+        "        self.df['di'] = np.where( self.df['p_di'] > self.df['m_di'], 1, 0)\n",
+        "        self.df = self.df.dropna()\n",
+        "        self.df['mfi'] = self._do_bin(self.df['mfi_r'])\n",
+        "        self.df['stock_d'] = self._do_bin(self.df['stock_d_r'])\n",
+        "        self.df['adx'] = self._do_bin(self.df['adx_r'])\n",
+        "\n",
+        "        # self.df['state'] = self.df['di']*1000+ self.df['mfi']*100 + self.df['stock_d']*10 + self.df['adx']\n",
+        "\n",
+        "        prices = self.df.loc[:, 'Close'].to_numpy()\n",
+        "        # signal_features = self.df.loc[:, 'state'].to_numpy()\n",
+        "        signal_features = self.df.loc[:, ['di', 'mfi', 'stock_d','adx']].to_numpy()\n",
+        "\n",
+        "        return prices, signal_features\n",
+        "\n",
+        "\n",
+        "    def _do_act(self, action):\n",
+        "        if action is None:\n",
+        "            self._action_history[self._current_tick-1]=6\n",
+        "\n",
+        "        current_price = self.prices[self._current_tick]\n",
+        "        last_price = self.prices[self._current_tick - 1]\n",
+        "        price_diff = current_price - last_price\n",
+        "\n",
+        "        # OPEN BUY - 1\n",
+        "        if action == Actions.Buy.value and self._position == 0:\n",
+        "            self._position = last_price\n",
+        "            # step_reward += price_diff\n",
+        "            self._last_trade_tick = self._current_tick - 1\n",
+        "            self._action_history[self._current_tick-1]=1\n",
+        "\n",
+        "        # CLOSE BUY - 2\n",
+        "        elif action == Actions.Sell.value and self._position > 0:\n",
+        "            self._position = 0\n",
+        "            profit = self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
+        "            self._total_profit += profit\n",
+        "            self._action_history[self._current_tick-1]=2\n",
+        "            self._trade_history.append(profit)\n",
+        "            self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
+        "\n",
+        "        elif action == Actions.Buy.value and self._position > 0:\n",
+        "            self._action_history[self._current_tick-1]=5\n",
+        "\n",
+        "        # OPEN SELL - 3\n",
+        "        elif action == Actions.Sell.value and self._position == 0:\n",
+        "            self._position = -1 * last_price\n",
+        "            self._last_trade_tick = self._current_tick - 1\n",
+        "            self._action_history[self._current_tick-1]=3\n",
+        "\n",
+        "        # CLOSE SELL - 4\n",
+        "        elif action == Actions.Buy.value and self._position < 0:\n",
+        "            self._position = 0\n",
+        "            profit = -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
+        "            self._total_profit += profit\n",
+        "            self._action_history[self._current_tick-1]=4\n",
+        "            self._trade_history.append(profit)\n",
+        "            self._trade_tick_history.append((self._last_trade_tick, self._current_tick-1, self.prices[self._last_trade_tick], self.prices[self._current_tick-1], profit))\n",
+        "\n",
+        "        elif action == Actions.Sell.value and self._position < 0:\n",
+        "            self._action_history[self._current_tick-1]=5\n",
+        "\n",
+        "        # DO NOTHING - 0\n",
+        "        elif action == Actions.Do_nothing.value and self._position > 0:\n",
+        "            self._action_history[self._current_tick-1]=0\n",
+        "        elif action == Actions.Do_nothing.value and self._position < 0:\n",
+        "            self._action_history[self._current_tick-1]=0\n",
+        "        elif action == Actions.Do_nothing.value and self._position == 0:\n",
+        "            self._action_history[self._current_tick-1]=0\n",
+        "\n",
+        "    \n",
+        "    def _calculate_reward(self, action):\n",
+        "        current_price = self.prices[self._current_tick]\n",
+        "        last_price = self.prices[self._current_tick - 1]\n",
+        "        price_diff = current_price - last_price\n",
+        "\n",
+        "        if not self.history:\n",
+        "            return 0\n",
+        "\n",
+        "        # simple strategy, reward when close the buy or sell\n",
+        "        # closed buy\n",
+        "        if self._position == 0 and self.history['position'][-1] > 0 :\n",
+        "            return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
+        "        \n",
+        "        # close sell\n",
+        "        if self._position == 0 and self.history['position'][-1] < 0:\n",
+        "            return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
+        "\n",
+        "\n",
+        "        # # reward when open the buy or sell (DONT WORK)\n",
+        "        # # open buy\n",
+        "        # if self._position > 0 and self.history['position'][-1] == 0 :\n",
+        "        #     return self.prices[self._current_tick-1] - self.prices[self._last_trade_tick] \n",
+        "        \n",
+        "        # # open sell\n",
+        "        # if self._position < 0 and self.history['position'][-1] == 0:\n",
+        "        #     return -1 * (self.prices[self._current_tick-1] - self.prices[self._last_trade_tick]) \n",
+        "\n",
+        "        # # PRB\n",
+        "        # return price_diff * self._position\n",
+        "\n",
+        "\n",
+        "        return 0\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Training parameters\n",
+        "n_training_episodes = 20000  # Total training episodes\n",
+        "learning_rate = 0.2          # Learning rate\n",
+        "\n",
+        "# Environment parameters\n",
+        "max_steps = 20   # Max steps per episode\n",
+        "gamma = 0.95                 # Discounting rate\n",
+        "\n",
+        "# Exploration parameters\n",
+        "max_epsilon = 1.0             # Exploration probability at start\n",
+        "# max_epsilon = 1.0             # Exploration probability at start\n",
+        "min_epsilon = 0.05            # Minimum exploration probability \n",
+        "# min_epsilon = 0.05            # Minimum exploration probability \n",
+        "decay_rate = 0.0005            # Exponential decay rate for exploration prob"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "REhmfLkYNTiN",
+        "outputId": "cf676f6d-83df-43f5-89fe-3258e0041d9d"
+      },
+      "outputs": [],
+      "source": [
+        "# create env\n",
+        "env = CustTradingEnv(df=eth_train, max_steps=max_steps, random_start=True)\n",
+        "Qtable_trading = initialize_q_table()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "\n",
+        "# train \n",
+        "Qtable_trading, state_history = train(n_training_episodes, min_epsilon, max_epsilon, \n",
+        "                        decay_rate, env, max_steps, Qtable_trading, learning_rate, gamma )\n",
+        "\n",
+        "len(Qtable_trading.getall())\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Qtable_trading.getall()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "max_steps = 60 \n",
+        "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=True)\n",
+        "n_eval_episodes = 1000\n",
+        "\n",
+        "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15,6))\n",
+        "plt.cla()\n",
+        "env_test.render()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# trade sequential\n",
+        "max_steps = len(eth_test)\n",
+        "env_test = CustTradingEnv(df=eth_test, max_steps=max_steps, random_start=False)\n",
+        "n_eval_episodes = 1\n",
+        "\n",
+        "evaluate_agent(env_test, max_steps, n_eval_episodes, Qtable_trading)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "plt.figure(figsize=(15,6))\n",
+        "plt.cla()\n",
+        "env_test.render()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# env_test._trade_tick_history\n",
+        "# Qtable_trading.getall()[:10]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3.8.13 ('rl2')",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.13"
+    },
+    "orig_nbformat": 4,
+    "vscode": {
+      "interpreter": {
+        "hash": "cd60ab8388a66026f336166410d6a8a46ddf65ece2e85ad2d46c8b98d87580d1"
+      }
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "01a2dbcb714e40148b41c761fcf43147": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "20b0f38ec3234ff28a62a286cd57b933": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "PasswordModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "PasswordModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "PasswordView",
+            "continuous_update": true,
+            "description": "Token:",
+            "description_tooltip": null,
+            "disabled": false,
+            "layout": "IPY_MODEL_01a2dbcb714e40148b41c761fcf43147",
+            "placeholder": "",
+            "style": "IPY_MODEL_90c874e91b304ee1a7ef147767ac00ce",
+            "value": ""
+          }
+        },
+        "270cbb5d6e9c4b1e9e2f39c8b3b0c15f": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "VBoxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "VBoxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "VBoxView",
+            "box_style": "",
+            "children": [
+              "IPY_MODEL_a02224a43d8d4af3bd31d326540d25da",
+              "IPY_MODEL_20b0f38ec3234ff28a62a286cd57b933",
+              "IPY_MODEL_f6c845330d6743c0b35c2c7ad834de77",
+              "IPY_MODEL_f1675c09d16a4251b403f9c56255f168",
+              "IPY_MODEL_c1a82965ae26479a98e4fdbde1e64ec2"
+            ],
+            "layout": "IPY_MODEL_3fa248114ac24656ba74923936a94d2d"
+          }
+        },
+        "2dc5fa9aa3334dfcbdee9c238f2ef60b": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "3e753b0212644990b558c68853ff2041": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "3fa248114ac24656ba74923936a94d2d": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": "center",
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": "flex",
+            "flex": null,
+            "flex_flow": "column",
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": "50%"
+          }
+        },
+        "42d140b838b844819bc127afc1b7bc84": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "90c874e91b304ee1a7ef147767ac00ce": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "9d847f9a7d47458d8cd57d9b599e47c6": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "a02224a43d8d4af3bd31d326540d25da": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_caef095934ec47bbb8b64eab22049284",
+            "placeholder": "",
+            "style": "IPY_MODEL_2dc5fa9aa3334dfcbdee9c238f2ef60b",
+            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
+          }
+        },
+        "a2cfb91cf66447d7899292854bd64a07": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "c1a82965ae26479a98e4fdbde1e64ec2": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "HTMLModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "HTMLModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "HTMLView",
+            "description": "",
+            "description_tooltip": null,
+            "layout": "IPY_MODEL_9d847f9a7d47458d8cd57d9b599e47c6",
+            "placeholder": "",
+            "style": "IPY_MODEL_42d140b838b844819bc127afc1b7bc84",
+            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
+          }
+        },
+        "caef095934ec47bbb8b64eab22049284": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "eaba3f1de4444aabadfea2a3dadb1d80": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "DescriptionStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "DescriptionStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "description_width": ""
+          }
+        },
+        "ee4a21bedc504171ad09d205d634b528": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ButtonStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "button_color": null,
+            "font_weight": ""
+          }
+        },
+        "f1675c09d16a4251b403f9c56255f168": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ButtonModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ButtonView",
+            "button_style": "",
+            "description": "Login",
+            "disabled": false,
+            "icon": "",
+            "layout": "IPY_MODEL_a2cfb91cf66447d7899292854bd64a07",
+            "style": "IPY_MODEL_ee4a21bedc504171ad09d205d634b528",
+            "tooltip": ""
+          }
+        },
+        "f6c845330d6743c0b35c2c7ad834de77": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "CheckboxModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "CheckboxModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "CheckboxView",
+            "description": "Add token as git credential?",
+            "description_tooltip": null,
+            "disabled": false,
+            "indent": true,
+            "layout": "IPY_MODEL_3e753b0212644990b558c68853ff2041",
+            "style": "IPY_MODEL_eaba3f1de4444aabadfea2a3dadb1d80",
+            "value": true
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

fin_rl_qlearning_v1-7.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

todo_next.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ - Testar PPO com TI de hoje e ontem
2	+ - Testar log return como reward