{ "cells": [ { "cell_type": "code", "execution_count": 23, "source": [ "import pandas as pd\r\n", "import numpy as np\r\n", "import seaborn as sns\r\n", "import matplotlib.pyplot as plt\r\n", "import pickle as pk" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 24, "source": [ "df=pd.read_csv('all_stocks_5yr.csv')\r\n" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 22, "source": [ "df1=df['close']\r\n", "df1.iloc[0]" ], "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "14.75" ] }, "metadata": {}, "execution_count": 22 } ], "metadata": {} }, { "cell_type": "code", "execution_count": 25, "source": [ "#Creating Environment Matrix 2x2x3\r\n", "env_rows=2\r\n", "env_cols=2\r\n", "n_action=3\r\n", "\r\n", "q_table=np.zeros((env_rows,env_cols,n_action))\r\n", "np.random.seed()\r\n", "pk.dump(q_table,open(\"pickl.pkl\",'wb'))" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 7, "source": [ "pk.load(open(\"pickl.pkl\",'rb'))" ], "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'hey'" ] }, "metadata": {}, "execution_count": 7 } ], "metadata": {} }, { "cell_type": "code", "execution_count": 26, "source": [ "#Defining Data Preprocessing Function\r\n", "\r\n", "def data_prep(data,name):\r\n", " df=pd.DataFrame(data[data['Name']==name])\r\n", " df.dropna(inplace=True)\r\n", " df.drop(['high','low','volume','Name'],axis=1,inplace=True)\r\n", " df.reset_index(drop=True,inplace=True)\r\n", " # Calculating 5 day and 1 day Moving Average for DF\r\n", " df['5day_MA']=df['close'].rolling(5).mean()\r\n", " df['1day_MA']=df['close'].rolling(1).mean()\r\n", " df['5day_MA'][:4]=0\r\n", " #Splitting into train and Test data\r\n", " train_df=df[:int(len(df)*0.8)]\r\n", " test_df=df[int(len(df)*0.8):].reset_index(drop=True)\r\n", " return train_df,test_df\r\n", "\r\n", "# Get the state for datapoint by Moving Average\r\n", "def get_state(long_ma,short_ma,t):\r\n", " if short_malong_ma:\r\n", " if t==1:\r\n", " return (1,1) #Cash\r\n", " else :\r\n", " return (1,0) #Stock\r\n", "\r\n", "\r\n", "#Checking if the user can trade or not\r\n", "def trade_t(num_of_stocks,port_value,current_price):\r\n", " if num_of_stocks>=0:\r\n", " if port_value>current_price:\r\n", " return 1\r\n", " else :return 0\r\n", " else:\r\n", " if port_value>current_price:\r\n", " return 1\r\n", " else :return 0\r\n", "\r\n", "\r\n", "\r\n", "#Get next action by Epsilon greedy\r\n", "def next_act(state,qtable,epsilon,action=3):\r\n", " if np.random.rand() < epsilon:\r\n", " action=np.random.randint(action)\r\n", " else:\r\n", " action=np.argmax(qtable[state])\r\n", " \r\n", " \r\n", " return action\r\n", "\r\n", "\r\n", "\r\n", "# Immidiate reward Generator based on cummulative wealth \r\n", "def get_reward(state,action,current_close,past_close,buy_history):\r\n", " if state==(0,0) or state==(1,0): #Stock position\r\n", " if action==0:\r\n", " return -1000\r\n", " elif action==1:\r\n", " return (current_close-buy_history)\r\n", " elif action==2:\r\n", " return (current_close-past_close)\r\n", " \r\n", " elif state==(0,1) or state==(1,1): #Cash Position\r\n", " if action==0:\r\n", " return 0\r\n", " elif action==1:\r\n", " return -1000\r\n", " elif action==2:\r\n", " return (current_close-past_close)\r\n", "\r\n", " \r\n", " \r\n", " \r\n", "\r\n" ], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [ "

Reading and preprocessing the Dataset" ], "metadata": {} }, { "cell_type": "code", "execution_count": 27, "source": [ "stocks=pd.read_csv('all_stocks_5yr.csv')\r\n", "stocks_train,stocks_test=data_prep(stocks,'AAPL')" ], "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "C:\\Users\\mchil\\AppData\\Local\\Temp/ipykernel_12420/1010674326.py:11: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " df['5day_MA'][:4]=0\n" ] } ], "metadata": {} }, { "cell_type": "markdown", "source": [ "

Training the Dataset" ], "metadata": {} }, { "cell_type": "code", "execution_count": 28, "source": [ "episodes=100\r\n", "port_value=1000\r\n", "num_stocks=0\r\n", "epsilon=1 #Epsilon Greedy\r\n", "alpha=0.05 #Learning Rate\r\n", "gamma=0.15 #Discount Factor\r\n", "buy_history=0\r\n", "net_worth=[1000] #Portfolio Value\r\n", "np.random.seed()\r\n", "for i in range(episodes): #Iteration for each episode\r\n", " port_value=1000\r\n", " num_stocks=0\r\n", " buy_history=0\r\n", " net_worth=[1000]\r\n", " \r\n", "\r\n", " for dt in range(len(stocks_train)): #Iteration through each dataset\r\n", " long_ma=stocks_train.iloc[dt]['5day_MA']\r\n", " short_ma=stocks_train.iloc[dt]['1day_MA']\r\n", " close_price=stocks_train.iloc[dt]['close']\r\n", " next_close=0\r\n", " \r\n", " if dt>0:\r\n", " past_close=stocks_train.iloc[dt-1]['close']\r\n", " else:\r\n", " past_close=close_price\r\n", " t=trade_t(num_stocks,net_worth[-1],close_price)\r\n", " state=get_state(long_ma,short_ma,t)\r\n", " action=next_act(state,q_table,epsilon)\r\n", "\r\n", " if action==0:#Buy\r\n", " \r\n", " num_stocks+=1\r\n", " buy_history=close_price\r\n", " to_append=net_worth[-1]-close_price\r\n", " net_worth.append(np.round(to_append,1))\r\n", " r=0\r\n", " \r\n", " \r\n", " \r\n", " elif action==1:#Sell\r\n", " # if num_stocks>0:\r\n", " num_stocks-=1 \r\n", " to_append=net_worth[-1]+close_price\r\n", " net_worth.append(np.round(to_append,1))\r\n", " # buy_history.pop(0)\r\n", " \r\n", " elif action==2:#hold\r\n", " to_append=net_worth[-1]+close_price\r\n", " net_worth.append(np.round(to_append,1))\r\n", " \r\n", " \r\n", " \r\n", " \r\n", "\r\n", " r=get_reward(state,action,close_price,past_close,buy_history) #Getting Reward\r\n", " \r\n", " try:\r\n", " next_state=get_state(stocks_train.iloc[dt+1]['5day_MA'],stocks_train.iloc[dt+1]['1day_MA'],t)\r\n", " \r\n", " except:\r\n", " break\r\n", " #Updating Q_table by Bellmen's Equation\r\n", " q_table[state][action]=(1.-alpha)*q_table[state][action]+alpha*(r+gamma*np.max(q_table[next_state]))\r\n", " \r\n", " if (epsilon-0.01)>0.15:\r\n", " epsilon-=0.01\r\n", "\r\n", "print(\"Training Complete\")" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Training Complete\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 38, "source": [ "pk.dump(q_table,open('pickl.pkl','wb'))" ], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [ "

Tracking the Portfolio Value " ], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} }, { "cell_type": "markdown", "source": [ "

Testing the Dataset" ], "metadata": {} }, { "cell_type": "code", "execution_count": 8, "source": [], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Test Complete\n" ] } ], "metadata": {} }, { "cell_type": "markdown", "source": [ "

Plotting the portfolio for the test Dataset " ], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 10, "source": [ "num_stocks" ], "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "94" ] }, "metadata": {}, "execution_count": 10 } ], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} } ], "metadata": { "orig_nbformat": 4, "language_info": { "name": "python", "version": "3.9.7", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "kernelspec": { "name": "python3", "display_name": "Python 3.9.7 64-bit" }, "interpreter": { "hash": "60d8401257a87028599f7501811ce2c94d605f29d0573af229f453e115e13ba6" } }, "nbformat": 4, "nbformat_minor": 2 }