{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from utils.ipynb_helpers import read_data\n", "import numpy as np\n", "from scipy import stats\n", "import statistics" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = read_data(\"./data/stock/full_1min.csv\")\n", "\n", "ticker = \"WTI\"\n", "stat = \"logpctchange\"\n", "df_t = df[ticker, stat]\n", "df_t_log = np.log(df_t + 1)\n", "df_t_exp = np.exp(df_t)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# df_t[df.index > '2019'].plot()\n", "df_t.plot()\n", "pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Figure out stats for each ticker\n", "for t in df.columns.levels[0].to_list():\n", " looking_at = df[t, stat][df.index < \"2023-01-01\"]\n", " mean = statistics.mean(looking_at)\n", " sd = statistics.stdev(looking_at)\n", " print(f\"{t}:\\t\", \"mean:\", f\"{mean:.8f}\", \"sd:\", f\"{np.abs(mean):.6f}\", \"var:\", f\"{np.square(sd):.8f}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "looking_at = df[ticker, stat][df.index > \"2020-01-01\"]\n", "print(\"variance\", looking_at.var())\n", "mean = statistics.mean(looking_at)\n", "sd = statistics.stdev(looking_at)\n", "print(\"mean:\", mean, \"sd:\", sd, \"var:\", np.square(sd))\n", "# 1.092799419696949e-05\n", "# looking_at.plot.hist(bins=500, figsize=(20, 10))\n", "((looking_at - mean) / sd).plot.hist(\n", " bins=200,\n", " figsize=(20, 10),\n", " title=\"Histogram of XOM Log Percent Change after 2020-01-01\",\n", ")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "max(df_t)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "looking_at = df_t[df.index < \"2020-01-01\"]\n", "# print(looking_at.var())\n", "\n", "mean = statistics.mean(looking_at)\n", "sd = statistics.stdev(looking_at)\n", "print(mean, sd, np.square(sd))\n", "\n", "looking_at.plot.hist(\n", " bins=2000,\n", " figsize=(20, 10),\n", " title=\"Histogram of XOM Log Percent Change after 2020-01-01\",\n", ")\n", "# ((looking_at-mean)/sd).plot.hist(bins=2000, figsize=(20, 10))\n", "# plt.plot((looking_at-mean)/sd)#stats.norm.pdf(looking_at, mean, sd)\n", "\n", "# df_t_log.plot.hist(bins=500, figsize=(20, 10))\n", "# df_t_exp.plot.hist(bins=500, figsize=(20, 10))\n", "pass" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Outlier stuff\n", "\n", "df_t_out = df_t[(np.abs(stats.zscore(df_t)) < 3)]\n", "print(len(df_t_out))\n", "df_t_out.plot.hist(bins=2000, figsize=(20, 10))\n", "\n", "\n", "\n", "\n", "lower = df.XOM.pctchange.quantile(0.005)\n", "upper = df.XOM.pctchange.quantile(0.995)\n", "print(lower, upper)\n", "# # df.XOM.pctchange[df.XOM.pctchange.between(lower,upper)]\n", "df.XOM.pctchange[df.XOM.pctchange.between(lower, upper) == False]\n", "# # df.XOM.pctchange.clip(lower=lower, upper=upper)\n", "\n", "lower = df.quantile(0.005)\n", "upper = df.quantile(0.995)\n", "df.clip(lower=lower, upper=upper, axis=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "start_date = \"2022-01-03\"\n", "end_date = \"2022-01-04\"\n", "f1 = df_t[df.index > start_date]\n", "f2 = f1[f1.index < end_date]\n", "print(f2)\n", "# f = plt.figure()\n", "# f.set_figwidth(60)\n", "# f.set_figheight(20)\n", "plt.figure(figsize=(24,4))\n", "plt.plot(f2.index.to_numpy(), np.exp(np.cumsum(f2.to_numpy())))\n", "f2.head(40)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "vscode": { "interpreter": { "hash": "51980e48e269f7c05efac26b22569386591d7f1d45336266d53ed7fc3ab7efc6" } } }, "nbformat": 4, "nbformat_minor": 2 }