{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install pygam seaborn statsmodels --quiet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.model_selection import train_test_split\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import StandardScaler,OrdinalEncoder\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LinearRegression, LogisticRegression\n",
    "from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import zipfile\n",
    "from statsmodels.genmod.generalized_linear_model import GLM\n",
    "from statsmodels.genmod.families import Gamma\n",
    "from statsmodels.genmod.families.links import Log\n",
    "from statsmodels.tools import add_constant\n",
    "from pygam import LinearGAM, GammaGAM, s, f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>CONTROL</th>\n",
       "      <th>AGE1</th>\n",
       "      <th>METRO3</th>\n",
       "      <th>REGION</th>\n",
       "      <th>LMED</th>\n",
       "      <th>FMR</th>\n",
       "      <th>L30</th>\n",
       "      <th>L50</th>\n",
       "      <th>L80</th>\n",
       "      <th>IPOV</th>\n",
       "      <th>...</th>\n",
       "      <th>FMTCOST06RELAMICAT</th>\n",
       "      <th>FMTCOST08RELAMICAT</th>\n",
       "      <th>FMTCOST12RELAMICAT</th>\n",
       "      <th>FMTCOSTMEDRELAMICAT</th>\n",
       "      <th>FMTINCRELAMICAT</th>\n",
       "      <th>FMTASSISTED</th>\n",
       "      <th>FMTBURDEN</th>\n",
       "      <th>FMTREGION</th>\n",
       "      <th>FMTSTATUS</th>\n",
       "      <th>HOUSE_AGE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'036000001146'</td>\n",
       "      <td>34</td>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>2580</td>\n",
       "      <td>24950</td>\n",
       "      <td>41550</td>\n",
       "      <td>66500</td>\n",
       "      <td>17849</td>\n",
       "      <td>...</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'.'</td>\n",
       "      <td>'2 30% to 50%'</td>\n",
       "      <td>'West'</td>\n",
       "      <td>'-5'</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>'036000001147'</td>\n",
       "      <td>43</td>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>2241</td>\n",
       "      <td>27700</td>\n",
       "      <td>46150</td>\n",
       "      <td>73850</td>\n",
       "      <td>22629</td>\n",
       "      <td>...</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'.'</td>\n",
       "      <td>'1 Less than 30%'</td>\n",
       "      <td>'West'</td>\n",
       "      <td>'-5'</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>'036000001149'</td>\n",
       "      <td>60</td>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>2577</td>\n",
       "      <td>24950</td>\n",
       "      <td>41550</td>\n",
       "      <td>66500</td>\n",
       "      <td>17399</td>\n",
       "      <td>...</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'.'</td>\n",
       "      <td>'1 Less than 30%'</td>\n",
       "      <td>'West'</td>\n",
       "      <td>'-5'</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>'036000001150'</td>\n",
       "      <td>37</td>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>2241</td>\n",
       "      <td>22200</td>\n",
       "      <td>36950</td>\n",
       "      <td>59100</td>\n",
       "      <td>14985</td>\n",
       "      <td>...</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'.'</td>\n",
       "      <td>'2 30% to 50%'</td>\n",
       "      <td>'West'</td>\n",
       "      <td>'-5'</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>'036000001151'</td>\n",
       "      <td>33</td>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>2580</td>\n",
       "      <td>27700</td>\n",
       "      <td>46150</td>\n",
       "      <td>73850</td>\n",
       "      <td>22557</td>\n",
       "      <td>...</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'7 120% AMI +'</td>\n",
       "      <td>'.'</td>\n",
       "      <td>'2 30% to 50%'</td>\n",
       "      <td>'West'</td>\n",
       "      <td>'-5'</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 100 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          CONTROL  AGE1 METRO3 REGION   LMED   FMR    L30    L50    L80  \\\n",
       "0  '036000001146'    34    '2'    '4'  84200  2580  24950  41550  66500   \n",
       "1  '036000001147'    43    '2'    '4'  84200  2241  27700  46150  73850   \n",
       "2  '036000001149'    60    '2'    '4'  84200  2577  24950  41550  66500   \n",
       "3  '036000001150'    37    '2'    '4'  84200  2241  22200  36950  59100   \n",
       "4  '036000001151'    33    '2'    '4'  84200  2580  27700  46150  73850   \n",
       "\n",
       "    IPOV  ...  FMTCOST06RELAMICAT  FMTCOST08RELAMICAT FMTCOST12RELAMICAT  \\\n",
       "0  17849  ...      '7 120% AMI +'      '7 120% AMI +'     '7 120% AMI +'   \n",
       "1  22629  ...      '7 120% AMI +'      '7 120% AMI +'     '7 120% AMI +'   \n",
       "2  17399  ...      '7 120% AMI +'      '7 120% AMI +'     '7 120% AMI +'   \n",
       "3  14985  ...      '7 120% AMI +'      '7 120% AMI +'     '7 120% AMI +'   \n",
       "4  22557  ...      '7 120% AMI +'      '7 120% AMI +'     '7 120% AMI +'   \n",
       "\n",
       "   FMTCOSTMEDRELAMICAT FMTINCRELAMICAT  FMTASSISTED          FMTBURDEN  \\\n",
       "0       '7 120% AMI +'  '7 120% AMI +'          '.'     '2 30% to 50%'   \n",
       "1       '7 120% AMI +'  '7 120% AMI +'          '.'  '1 Less than 30%'   \n",
       "2       '7 120% AMI +'  '7 120% AMI +'          '.'  '1 Less than 30%'   \n",
       "3       '7 120% AMI +'  '7 120% AMI +'          '.'     '2 30% to 50%'   \n",
       "4       '7 120% AMI +'  '7 120% AMI +'          '.'     '2 30% to 50%'   \n",
       "\n",
       "   FMTREGION  FMTSTATUS  HOUSE_AGE  \n",
       "0     'West'       '-5'          9  \n",
       "1     'West'       '-5'          9  \n",
       "2     'West'       '-5'         10  \n",
       "3     'West'       '-5'         10  \n",
       "4     'West'       '-5'         16  \n",
       "\n",
       "[5 rows x 100 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_2011, df_2013 = pd.read_csv('thads2011.txt'),pd.read_csv('thads2013n.txt')\n",
    "df_2011['HOUSE_AGE'], df_2013['HOUSE_AGE'] = 2011 - df_2011['BUILT'], 2013 - df_2013['BUILT']\n",
    "df = pd.concat([df_2011, df_2013], ignore_index=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df['OWNRENT'] == \"'1'\"]\n",
    "df = df[df['FMTSTRUCTURETYPE'] == \"'1 Single Family'\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>METRO3</th>\n",
       "      <th>REGION</th>\n",
       "      <th>LMED</th>\n",
       "      <th>BEDRMS</th>\n",
       "      <th>HOUSE_AGE</th>\n",
       "      <th>VALUE</th>\n",
       "      <th>ROOMS</th>\n",
       "      <th>PER</th>\n",
       "      <th>UTILITY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>720000</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>300.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>550000</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>256.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>720000</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>233.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>450000</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>152.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>'2'</td>\n",
       "      <td>'4'</td>\n",
       "      <td>84200</td>\n",
       "      <td>4</td>\n",
       "      <td>16</td>\n",
       "      <td>700000</td>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>656.166667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  METRO3 REGION   LMED  BEDRMS  HOUSE_AGE   VALUE  ROOMS  PER     UTILITY\n",
       "0    '2'    '4'  84200       4          9  720000      8    3  300.000000\n",
       "1    '2'    '4'  84200       3          9  550000      5    4  256.000000\n",
       "2    '2'    '4'  84200       5         10  720000     11    3  233.000000\n",
       "3    '2'    '4'  84200       3         10  450000      5    2  152.000000\n",
       "4    '2'    '4'  84200       4         16  700000      9    4  656.166667"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_columns = [\n",
    "    'METRO3',\n",
    "    'REGION',\n",
    "    'LMED',\n",
    "    'BEDRMS',\n",
    "    'HOUSE_AGE',\n",
    "    'VALUE',\n",
    "    'ROOMS',\n",
    "    'PER',\n",
    "    'UTILITY',\n",
    "]\n",
    "\n",
    "df = df[keep_columns]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>METRO3</th>\n",
       "      <th>REGION</th>\n",
       "      <th>LMED</th>\n",
       "      <th>BEDRMS</th>\n",
       "      <th>HOUSE_AGE</th>\n",
       "      <th>VALUE</th>\n",
       "      <th>ROOMS</th>\n",
       "      <th>PER</th>\n",
       "      <th>UTILITY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>84200</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>720000</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>300.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>84200</td>\n",
       "      <td>3</td>\n",
       "      <td>9</td>\n",
       "      <td>550000</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>256.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>84200</td>\n",
       "      <td>5</td>\n",
       "      <td>10</td>\n",
       "      <td>720000</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>233.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>84200</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>450000</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>152.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>84200</td>\n",
       "      <td>4</td>\n",
       "      <td>16</td>\n",
       "      <td>700000</td>\n",
       "      <td>9</td>\n",
       "      <td>4</td>\n",
       "      <td>656.166667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   METRO3  REGION   LMED  BEDRMS  HOUSE_AGE   VALUE  ROOMS  PER     UTILITY\n",
       "0       2       4  84200       4          9  720000      8    3  300.000000\n",
       "1       2       4  84200       3          9  550000      5    4  256.000000\n",
       "2       2       4  84200       5         10  720000     11    3  233.000000\n",
       "3       2       4  84200       3         10  450000      5    2  152.000000\n",
       "4       2       4  84200       4         16  700000      9    4  656.166667"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['METRO3'] = df['METRO3'].str.replace(\"'\", \"\").astype(int)\n",
    "df['REGION'] = df['REGION'].str.replace(\"'\", \"\").astype(int)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "region_code = {\n",
    "    'Northeast': 1,\n",
    "    'Midwest': 2,\n",
    "    'South': 3,\n",
    "    'West': 4,\n",
    "}\n",
    "\n",
    "metro_code = {\n",
    "    'Central cities of metropolitan areas': 1,\n",
    "    'Inside metropolitan area, but not in central city': 2,\n",
    "    'Inside metropolitan area, but not in central city - rural': 3,\n",
    "    'Outside metropolitan areas, urbanized': 4,\n",
    "    'Outside metropolitan areas, rural': 5,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('hud_dataset.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "X, y = df.drop(columns=['VALUE']), df['VALUE']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>METRO3</th>\n",
       "      <th>REGION</th>\n",
       "      <th>LMED</th>\n",
       "      <th>BEDRMS</th>\n",
       "      <th>HOUSE_AGE</th>\n",
       "      <th>ROOMS</th>\n",
       "      <th>PER</th>\n",
       "      <th>UTILITY</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>22170</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>67109</td>\n",
       "      <td>4</td>\n",
       "      <td>21</td>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "      <td>126.083333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17193</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>62800</td>\n",
       "      <td>3</td>\n",
       "      <td>41</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>201.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137803</th>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>84864</td>\n",
       "      <td>4</td>\n",
       "      <td>41</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>502.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39271</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>64200</td>\n",
       "      <td>3</td>\n",
       "      <td>61</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>301.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26364</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>61864</td>\n",
       "      <td>3</td>\n",
       "      <td>91</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>461.166667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        METRO3  REGION   LMED  BEDRMS  HOUSE_AGE  ROOMS  PER     UTILITY\n",
       "22170        2       2  67109       4         21      9    3  126.083333\n",
       "17193        2       2  62800       3         41      6    2  201.000000\n",
       "137803       4       1  84864       4         41      7    5  502.500000\n",
       "39271        2       1  64200       3         61      7    4  301.000000\n",
       "26364        3       2  61864       3         91      6    3  461.166667"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 11) |                         | Elapsed Time: 0:00:00 ETA:  --:--:--\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  9% (1 of 11) |##                       | Elapsed Time: 0:00:35 ETA:   0:05:51\n",
      " 18% (2 of 11) |####                     | Elapsed Time: 0:01:09 ETA:   0:05:08\n",
      " 27% (3 of 11) |######                   | Elapsed Time: 0:01:44 ETA:   0:04:38\n",
      " 36% (4 of 11) |#########                | Elapsed Time: 0:02:22 ETA:   0:04:25\n",
      " 45% (5 of 11) |###########              | Elapsed Time: 0:02:55 ETA:   0:03:22\n",
      " 54% (6 of 11) |#############            | Elapsed Time: 0:03:28 ETA:   0:02:43\n",
      " 63% (7 of 11) |###############          | Elapsed Time: 0:04:01 ETA:   0:02:12\n",
      " 72% (8 of 11) |##################       | Elapsed Time: 0:04:36 ETA:   0:01:43\n",
      " 81% (9 of 11) |####################     | Elapsed Time: 0:05:07 ETA:   0:01:02\n",
      " 90% (10 of 11) |#####################   | Elapsed Time: 0:05:40 ETA:   0:00:33\n",
      "100% (11 of 11) |########################| Elapsed Time: 0:06:14 Time:  0:06:14\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Squared Error: 57308274833.9465\n",
      "Root Mean Squared Error: 239391.4678\n"
     ]
    }
   ],
   "source": [
    "gam_model = LinearGAM(s(0, dtype='categorical') \n",
    "                      + s(1, dtype='categorical') \n",
    "                      + s(2, n_splines=250) \n",
    "                      + s(3, dtype='categorical') \n",
    "                      + s(4, n_splines=250) \n",
    "                      + s(5, dtype='categorical') \n",
    "                      + s(6, dtype='categorical') \n",
    "                      + s(7, n_splines=250) \n",
    "                      )\n",
    "\n",
    "gam_model.gridsearch(np.array(X_train), y_train)\n",
    "gam_model.fit(np.array(X_train), y_train)\n",
    "gam_test_preds = gam_model.predict(X_test)\n",
    "\n",
    "mse = mean_squared_error(y_test, gam_test_preds)\n",
    "print(f\"Mean Squared Error: {mse:.4f}\")\n",
    "\n",
    "rmse = np.sqrt(mse)\n",
    "print(f\"Root Mean Squared Error: {rmse:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('gam_model_hud.pkl', 'wb') as file:\n",
    "    pickle.dump(gam_model, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = torch.FloatTensor(X_train.values)\n",
    "y_train = torch.FloatTensor(y_train.values).reshape(-1, 1)\n",
    "X_test = torch.FloatTensor(X_test.values)\n",
    "y_test = torch.FloatTensor(y_test.values).reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HousePriceModel(nn.Module):\n",
    "    def __init__(self, input_size):\n",
    "        super(HousePriceModel, self).__init__()\n",
    "        self.model = nn.Sequential(\n",
    "            nn.Linear(input_size, 128),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(128, 64),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(64, 32),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(32, 1)\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.model(x)\n",
    "        return x\n",
    "\n",
    "model = HousePriceModel(X_train.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "criterion = nn.MSELoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=0.0001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [10/100], Loss: 461675626496.0000\n",
      "Epoch [20/100], Loss: 461912539136.0000\n",
      "Epoch [30/100], Loss: 465912332288.0000\n",
      "Epoch [40/100], Loss: 465710579712.0000\n",
      "Epoch [50/100], Loss: 465482645504.0000\n",
      "Epoch [60/100], Loss: 465170137088.0000\n",
      "Epoch [70/100], Loss: 464703356928.0000\n",
      "Epoch [80/100], Loss: 464297361408.0000\n",
      "Epoch [90/100], Loss: 463879012352.0000\n",
      "Epoch [100/100], Loss: 463475539968.0000\n"
     ]
    }
   ],
   "source": [
    "num_epochs = 100\n",
    "batch_size = 32\n",
    "\n",
    "for epoch in range(num_epochs):\n",
    "    for i in range(0, len(X_train), batch_size):\n",
    "        batch_X = X_train[i:i+batch_size]\n",
    "        batch_y = y_train[i:i+batch_size]\n",
    "        \n",
    "        outputs = model(batch_X)\n",
    "        loss = criterion(outputs, batch_y)\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "    \n",
    "    if (epoch + 1) % 10 == 0:\n",
    "        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Squared Error: 77202161664.0000\n",
      "Root Mean Squared Error: 277852.7812\n"
     ]
    }
   ],
   "source": [
    "model.eval()\n",
    "with torch.no_grad():\n",
    "    y_pred = model(X_test)\n",
    "    mse = criterion(y_pred, y_test)\n",
    "    rmse = torch.sqrt(mse)\n",
    "    print(f'Mean Squared Error: {mse.item():.4f}')\n",
    "    print(f'Root Mean Squared Error: {rmse.item():.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(model.state_dict(), 'dnn_model_hud.pth')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MedInc</th>\n",
       "      <th>HouseAge</th>\n",
       "      <th>AveRooms</th>\n",
       "      <th>AveBedrms</th>\n",
       "      <th>Population</th>\n",
       "      <th>AveOccup</th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8.3252</td>\n",
       "      <td>41.0</td>\n",
       "      <td>6.984127</td>\n",
       "      <td>1.023810</td>\n",
       "      <td>322.0</td>\n",
       "      <td>2.555556</td>\n",
       "      <td>37.88</td>\n",
       "      <td>-122.23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8.3014</td>\n",
       "      <td>21.0</td>\n",
       "      <td>6.238137</td>\n",
       "      <td>0.971880</td>\n",
       "      <td>2401.0</td>\n",
       "      <td>2.109842</td>\n",
       "      <td>37.86</td>\n",
       "      <td>-122.22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7.2574</td>\n",
       "      <td>52.0</td>\n",
       "      <td>8.288136</td>\n",
       "      <td>1.073446</td>\n",
       "      <td>496.0</td>\n",
       "      <td>2.802260</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5.6431</td>\n",
       "      <td>52.0</td>\n",
       "      <td>5.817352</td>\n",
       "      <td>1.073059</td>\n",
       "      <td>558.0</td>\n",
       "      <td>2.547945</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3.8462</td>\n",
       "      <td>52.0</td>\n",
       "      <td>6.281853</td>\n",
       "      <td>1.081081</td>\n",
       "      <td>565.0</td>\n",
       "      <td>2.181467</td>\n",
       "      <td>37.85</td>\n",
       "      <td>-122.25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \\\n",
       "0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   \n",
       "1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   \n",
       "2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   \n",
       "3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   \n",
       "4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   \n",
       "\n",
       "   Longitude  \n",
       "0    -122.23  \n",
       "1    -122.22  \n",
       "2    -122.24  \n",
       "3    -122.25  \n",
       "4    -122.25  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "housing = fetch_california_housing()\n",
    "\n",
    "X, y = pd.DataFrame(housing.data), pd.DataFrame(housing.target)\n",
    "X.columns = housing.feature_names\n",
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0% (0 of 11) |                         | Elapsed Time: 0:00:00 ETA:  --:--:--\n",
      "  9% (1 of 11) |##                       | Elapsed Time: 0:00:36 ETA:   0:06:09\n",
      " 18% (2 of 11) |####                     | Elapsed Time: 0:01:13 ETA:   0:05:24\n",
      " 27% (3 of 11) |######                   | Elapsed Time: 0:01:48 ETA:   0:04:46\n",
      " 36% (4 of 11) |#########                | Elapsed Time: 0:02:25 ETA:   0:04:15\n",
      " 45% (5 of 11) |###########              | Elapsed Time: 0:03:00 ETA:   0:03:30\n",
      " 54% (6 of 11) |#############            | Elapsed Time: 0:03:35 ETA:   0:02:55\n",
      " 63% (7 of 11) |###############          | Elapsed Time: 0:04:13 ETA:   0:02:30\n",
      " 72% (8 of 11) |##################       | Elapsed Time: 0:04:49 ETA:   0:01:48\n",
      " 81% (9 of 11) |####################     | Elapsed Time: 0:05:29 ETA:   0:01:20\n",
      " 90% (10 of 11) |#####################   | Elapsed Time: 0:06:09 ETA:   0:00:40\n",
      "100% (11 of 11) |########################| Elapsed Time: 0:06:47 Time:  0:06:47\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Squared Error: 0.3081\n",
      "Root Mean Squared Error: 0.5550\n"
     ]
    }
   ],
   "source": [
    "gam_model = LinearGAM(s(0, n_splines=250) \n",
    "                      + s(1, n_splines=250) \n",
    "                      + s(2, n_splines=250) \n",
    "                      + s(3, n_splines=250) \n",
    "                      + s(4, n_splines=250) \n",
    "                      + s(5, n_splines=250) \n",
    "                      + s(6, n_splines=250) \n",
    "                      + s(7, n_splines=250) \n",
    "                      )\n",
    "gam_model.gridsearch(np.array(X_train), y_train)\n",
    "gam_model.fit(np.array(X_train), y_train)\n",
    "gam_test_preds = gam_model.predict(X_test)\n",
    "\n",
    "mse = mean_squared_error(y_test, gam_test_preds)\n",
    "print(f\"Mean Squared Error: {mse:.4f}\")\n",
    "\n",
    "rmse = np.sqrt(mse)\n",
    "print(f\"Root Mean Squared Error: {rmse:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('gam_model_california.pkl', 'wb') as file:\n",
    "    pickle.dump(gam_model, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = torch.FloatTensor(X_train.values)\n",
    "y_train = torch.FloatTensor(y_train.values).reshape(-1, 1)\n",
    "X_test = torch.FloatTensor(X_test.values)\n",
    "y_test = torch.FloatTensor(y_test.values).reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HousePriceModel(nn.Module):\n",
    "    def __init__(self, input_size):\n",
    "        super(HousePriceModel, self).__init__()\n",
    "        self.model = nn.Sequential(\n",
    "            nn.Linear(input_size, 128),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(128, 64),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(64, 32),\n",
    "            nn.LeakyReLU(0.2,inplace=True),\n",
    "            nn.Linear(32, 1)\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.model(x)\n",
    "        return x\n",
    "\n",
    "model = HousePriceModel(X_train.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "criterion = nn.MSELoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=0.0001)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [10/100], Loss: 1.0281\n",
      "Epoch [20/100], Loss: 0.7823\n",
      "Epoch [30/100], Loss: 0.6930\n",
      "Epoch [40/100], Loss: 0.7010\n",
      "Epoch [50/100], Loss: 0.6770\n",
      "Epoch [60/100], Loss: 0.6713\n",
      "Epoch [70/100], Loss: 0.6740\n",
      "Epoch [80/100], Loss: 0.7874\n",
      "Epoch [90/100], Loss: 0.7111\n",
      "Epoch [100/100], Loss: 0.6300\n"
     ]
    }
   ],
   "source": [
    "num_epochs = 100\n",
    "batch_size = 32\n",
    "\n",
    "for epoch in range(num_epochs):\n",
    "    for i in range(0, len(X_train), batch_size):\n",
    "        batch_X = X_train[i:i+batch_size]\n",
    "        batch_y = y_train[i:i+batch_size]\n",
    "        \n",
    "        outputs = model(batch_X)\n",
    "        loss = criterion(outputs, batch_y)\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "    \n",
    "    if (epoch + 1) % 10 == 0:\n",
    "        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Squared Error: 0.9678\n",
      "Root Mean Squared Error: 0.9838\n"
     ]
    }
   ],
   "source": [
    "model.eval()\n",
    "with torch.no_grad():\n",
    "    y_pred = model(X_test)\n",
    "    mse = criterion(y_pred, y_test)\n",
    "    rmse = torch.sqrt(mse)\n",
    "    print(f'Mean Squared Error: {mse.item():.4f}')\n",
    "    print(f'Root Mean Squared Error: {rmse.item():.4f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(model.state_dict(), 'dnn_model_california.pth')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Latitude</th>\n",
       "      <th>Longitude</th>\n",
       "      <th>County</th>\n",
       "      <th>City</th>\n",
       "      <th>Incorportation_date</th>\n",
       "      <th>pop_april_1980</th>\n",
       "      <th>pop_april_1990</th>\n",
       "      <th>pop_april_2000</th>\n",
       "      <th>pop_april_2010</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>34.582769</td>\n",
       "      <td>-117.409214</td>\n",
       "      <td>San Bernardino</td>\n",
       "      <td>Adelanto</td>\n",
       "      <td>1970</td>\n",
       "      <td>2164.0</td>\n",
       "      <td>8517</td>\n",
       "      <td>18130</td>\n",
       "      <td>31765</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>34.153339</td>\n",
       "      <td>-118.761675</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>Agoura Hills</td>\n",
       "      <td>1982</td>\n",
       "      <td>20390.0</td>\n",
       "      <td>20390</td>\n",
       "      <td>20537</td>\n",
       "      <td>20330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37.765206</td>\n",
       "      <td>-122.241636</td>\n",
       "      <td>Alameda</td>\n",
       "      <td>Alameda</td>\n",
       "      <td>1854</td>\n",
       "      <td>63852.0</td>\n",
       "      <td>76459</td>\n",
       "      <td>72259</td>\n",
       "      <td>73812</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>37.886869</td>\n",
       "      <td>-122.297747</td>\n",
       "      <td>Alameda</td>\n",
       "      <td>Albany</td>\n",
       "      <td>1908</td>\n",
       "      <td>15130.0</td>\n",
       "      <td>16327</td>\n",
       "      <td>16444</td>\n",
       "      <td>18539</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>34.095286</td>\n",
       "      <td>-118.127014</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>Alhambra</td>\n",
       "      <td>1903</td>\n",
       "      <td>64767.0</td>\n",
       "      <td>82106</td>\n",
       "      <td>85804</td>\n",
       "      <td>83089</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Latitude   Longitude          County          City Incorportation_date  \\\n",
       "0  34.582769 -117.409214  San Bernardino      Adelanto                1970   \n",
       "1  34.153339 -118.761675     Los Angeles  Agoura Hills                1982   \n",
       "2  37.765206 -122.241636         Alameda       Alameda                1854   \n",
       "3  37.886869 -122.297747         Alameda        Albany                1908   \n",
       "4  34.095286 -118.127014     Los Angeles      Alhambra                1903   \n",
       "\n",
       "   pop_april_1980  pop_april_1990  pop_april_2000  pop_april_2010  \n",
       "0          2164.0            8517           18130           31765  \n",
       "1         20390.0           20390           20537           20330  \n",
       "2         63852.0           76459           72259           73812  \n",
       "3         15130.0           16327           16444           18539  \n",
       "4         64767.0           82106           85804           83089  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('california_cities.csv')\n",
    "df = df.iloc[:, 1:]\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.groupby(['County']).agg({\n",
    "    'Latitude': 'mean',\n",
    "    'Longitude': 'mean',\n",
    "    'pop_april_1990': 'sum',\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv('california_counties.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cnn_module",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}