{ "cells": [ { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "# %%\n", "import pandas as pd\n", "import os \n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import numpy as np\n", "from statsmodels.tsa.arima.model import ARIMA\n", "import itertools\n", "from sklearn.model_selection import TimeSeriesSplit\n", "from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error, r2_score\n", "from statsmodels.tsa.statespace.sarimax import SARIMAX\n", "from arch import arch_model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "req_naics = 336111\n", "df = pd.read_csv(f\"../data/processed_data_{req_naics}.csv\")" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\n", "/opt/conda/envs/ai_env/bin/python\n" ] } ], "source": [ "import sys\n", "print(sys.version)\n", "print(sys.executable)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | year | \n", "naics | \n", "emp | \n", "pay | \n", "prode | \n", "prodh | \n", "prodw | \n", "vship | \n", "matcost | \n", "vadd | \n", "... | \n", "equip | \n", "plant | \n", "piship | \n", "pimat | \n", "piinv | \n", "pien | \n", "dtfp5 | \n", "tfp5 | \n", "dtfp4 | \n", "tfp4 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1958 | \n", "336111 | \n", "146.1 | \n", "868.4 | \n", "116.2 | \n", "229.7 | \n", "650.6 | \n", "5007.9 | \n", "3411.3 | \n", "1563.8 | \n", "... | \n", "3291.1 | \n", "11718.8 | \n", "0.314 | \n", "0.238 | \n", "0.180 | \n", "0.147 | \n", "NaN | \n", "0.553 | \n", "NaN | \n", "0.550 | \n", "
| 1 | \n", "1959 | \n", "336111 | \n", "160.6 | \n", "1072.6 | \n", "131.0 | \n", "283.2 | \n", "827.2 | \n", "6422.2 | \n", "4306.7 | \n", "2143.4 | \n", "... | \n", "3457.5 | \n", "11415.6 | \n", "0.322 | \n", "0.243 | \n", "0.184 | \n", "0.145 | \n", "0.052 | \n", "0.583 | \n", "0.053 | \n", "0.580 | \n", "
| 2 | \n", "1960 | \n", "336111 | \n", "176.1 | \n", "1183.7 | \n", "144.7 | \n", "302.8 | \n", "925.7 | \n", "7239.0 | \n", "4883.5 | \n", "2336.2 | \n", "... | \n", "3673.0 | \n", "11375.1 | \n", "0.318 | \n", "0.241 | \n", "0.190 | \n", "0.150 | \n", "0.030 | \n", "0.601 | \n", "0.030 | \n", "0.598 | \n", "
| 3 | \n", "1961 | \n", "336111 | \n", "152.4 | \n", "1035.7 | \n", "123.2 | \n", "252.9 | \n", "789.5 | \n", "6214.2 | \n", "4134.9 | \n", "2053.6 | \n", "... | \n", "3794.2 | \n", "11179.9 | \n", "0.317 | \n", "0.242 | \n", "0.190 | \n", "0.146 | \n", "-0.007 | \n", "0.596 | \n", "-0.009 | \n", "0.592 | \n", "
| 4 | \n", "1962 | \n", "336111 | \n", "168.2 | \n", "1223.6 | \n", "138.4 | \n", "297.1 | \n", "954.7 | \n", "7855.3 | \n", "5187.9 | \n", "2671.2 | \n", "... | \n", "3908.1 | \n", "11141.2 | \n", "0.315 | \n", "0.246 | \n", "0.193 | \n", "0.147 | \n", "0.077 | \n", "0.644 | \n", "0.078 | \n", "0.641 | \n", "
5 rows × 24 columns
\n", "