diff --git "a/Week 2/week_2.ipynb" "b/Week 2/week_2.ipynb" new file mode 100644--- /dev/null +++ "b/Week 2/week_2.ipynb" @@ -0,0 +1,4288 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "GilDNZtwwrDP" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "source": [ + "!wget https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uH4-4LA8wyfP", + "outputId": "e31d3897-222c-4903-ba3f-9c2c0ca51a27" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2025-10-06 18:27:03-- https://raw.githubusercontent.com/alexeygrigorev/datasets/master/car_fuel_efficiency.csv\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 874188 (854K) [text/plain]\n", + "Saving to: ‘car_fuel_efficiency.csv’\n", + "\n", + "car_fuel_efficiency 100%[===================>] 853.70K --.-KB/s in 0.1s \n", + "\n", + "2025-10-06 18:27:03 (5.84 MB/s) - ‘car_fuel_efficiency.csv’ saved [874188/874188]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_csv(\"car_fuel_efficiency.csv\")\n", + "df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "CU_t600wxryA", + "outputId": "dff96ae5-7012-484d-e310-154f67766136" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement num_cylinders horsepower vehicle_weight \\\n", + "0 170 3.0 159.0 3413.433759 \n", + "1 130 5.0 97.0 3149.664934 \n", + "2 170 NaN 78.0 3079.038997 \n", + "3 220 4.0 NaN 2542.392402 \n", + "4 210 1.0 140.0 3460.870990 \n", + "\n", + " acceleration model_year origin fuel_type drivetrain num_doors \\\n", + "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n", + "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n", + "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n", + "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n", + "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 13.231729 \n", + "1 13.688217 \n", + "2 14.246341 \n", + "3 16.912736 \n", + "4 12.488369 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementnum_cylindershorsepowervehicle_weightaccelerationmodel_yearoriginfuel_typedrivetrainnum_doorsfuel_efficiency_mpg
01703.0159.03413.43375917.72003EuropeGasolineAll-wheel drive0.013.231729
11305.097.03149.66493417.82007USAGasolineFront-wheel drive0.013.688217
2170NaN78.03079.03899715.12018EuropeGasolineFront-wheel drive0.014.246341
32204.0NaN2542.39240220.22009USADieselAll-wheel drive2.016.912736
42101.0140.03460.87099014.42009EuropeGasolineAll-wheel drive2.012.488369
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 30,\n 260,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_cylinders\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9993225843926064,\n \"min\": 0.0,\n \"max\": 13.0,\n \"num_unique_values\": 14,\n \"samples\": [\n 9.0,\n 10.0,\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.879555200339446,\n \"min\": 37.0,\n \"max\": 271.0,\n \"num_unique_values\": 192,\n \"samples\": [\n 128.0,\n 82.0,\n 167.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.8948600311838,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2535.8875912388694,\n 2741.1704843893167,\n 2471.8802372627765\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"acceleration\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.510339293937879,\n \"min\": 6.0,\n \"max\": 24.3,\n \"num_unique_values\": 162,\n \"samples\": [\n 7.3,\n 20.3,\n 19.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2005,\n 2002,\n 2003\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"origin\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Europe\",\n \"USA\",\n \"Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Diesel\",\n \"Gasoline\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"drivetrain\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Front-wheel drive\",\n \"All-wheel drive\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_doors\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.048162111427726,\n \"min\": -4.0,\n \"max\": 4.0,\n \"num_unique_values\": 9,\n \"samples\": [\n -4.0,\n 2.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.5564677028974288,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 16.642943419221385,\n 16.298377150953442\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df.info()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LkOyXBAEyIX_", + "outputId": "0d48c4d9-833e-4165-cce9-ad5a0b31d5a3" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "RangeIndex: 9704 entries, 0 to 9703\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 engine_displacement 9704 non-null int64 \n", + " 1 num_cylinders 9222 non-null float64\n", + " 2 horsepower 8996 non-null float64\n", + " 3 vehicle_weight 9704 non-null float64\n", + " 4 acceleration 8774 non-null float64\n", + " 5 model_year 9704 non-null int64 \n", + " 6 origin 9704 non-null object \n", + " 7 fuel_type 9704 non-null object \n", + " 8 drivetrain 9704 non-null object \n", + " 9 num_doors 9202 non-null float64\n", + " 10 fuel_efficiency_mpg 9704 non-null float64\n", + "dtypes: float64(6), int64(2), object(3)\n", + "memory usage: 834.1+ KB\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "sns.histplot(df[\"fuel_efficiency_mpg\"])\n", + "plt.xlabel(\"Fuel Efficiency\")\n", + "plt.ylabel(\"Frequency\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 467 + }, + "id": "U-0cuSfpyVsN", + "outputId": "f9726efb-d17b-4771-c523-108ba391bf90" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Text(0, 0.5, 'Frequency')" + ] + }, + "metadata": {}, + "execution_count": 35 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGwCAYAAABPSaTdAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAANZ5JREFUeJzt3XtYVXXe///XFgEFOSTKKRWpPJGnsknpYKUkKON4uq3MEtPq1sFGJc3xO6amcw9lZU3dpM19KdTdlJNza402WYqHZhQ7UJZaNyOG7opjlCA6HIT1+6Of+54tbJDtZu/N8vm4rnVdrvX5rLXenxbky3W0GIZhCAAAwKQ6eLoAAACAtkTYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAAptbR0wV4g4aGBhUWFiooKEgWi8XT5QAAgItgGIZOnz6t6Ohodejg+PwNYUdSYWGhevbs6ekyAACAE7755hv16NHDYTthR1JQUJCkn/5jBQcHe7gaAABwMSorK9WzZ0/b3+OOEHYk26Wr4OBgwg4AAO1MS7egcIMyAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNY+GnZUrV8pisdhN/fv3t7VXV1crNTVVYWFh6tKli6ZMmaKSkhK7bVitViUnJysgIEDh4eFavHixzp075+6hAAAAL+Xxb2Nde+212rVrl22+Y8f/K2nhwoV65513tHnzZoWEhGjevHmaPHmy9u/fL0mqr69XcnKyIiMjdeDAARUVFWnGjBny9fXV7373O7ePBQAAeB+Ph52OHTsqMjKy0fKKigpt2LBBr7/+ukaNGiVJyszM1IABA3Tw4EGNGDFC77//vr788kvt2rVLERERGjp0qFavXq0lS5Zo5cqV8vPzc/dwAACAl/H4PTvHjh1TdHS0rrrqKk2fPl1Wq1WSlJubq7q6OiUkJNj69u/fX7169VJOTo4kKScnR4MGDVJERIStT2JioiorK3X06FGH+6ypqVFlZaXdBAAAzMmjYWf48OHKysrSjh07tG7dOhUUFOjWW2/V6dOnVVxcLD8/P4WGhtqtExERoeLiYklScXGxXdA5336+zZH09HSFhITYpp49e7p2YAAAwGt49DLW2LFjbX8ePHiwhg8frpiYGL355pvq3Llzm+136dKlSktLs81XVlYSeAAvkDR+korKyptsi+oeph3btrq5IgBm4PF7dv5VaGio+vbtq/z8fN15552qra3VqVOn7M7ulJSU2O7xiYyM1EcffWS3jfNPazV1H9B5/v7+8vf3d/0AAFySorJyDZj1VJNtX21c4uZqAJiFx+/Z+VdVVVU6fvy4oqKiNGzYMPn6+io7O9vWnpeXJ6vVqvj4eElSfHy8Dh8+rNLSUlufnTt3Kjg4WHFxcW6vHwAAeB+PntlZtGiRxo8fr5iYGBUWFmrFihXy8fHRtGnTFBISotmzZystLU1du3ZVcHCwHnnkEcXHx2vEiBGSpDFjxiguLk7333+/1qxZo+LiYi1btkypqamcuQEAAJI8HHa+/fZbTZs2TeXl5erevbtuueUWHTx4UN27d5ckPffcc+rQoYOmTJmimpoaJSYm6qWXXrKt7+Pjo+3bt2vu3LmKj49XYGCgUlJStGrVKk8NCQAAeBmLYRiGp4vwtMrKSoWEhKiiokLBwcGeLge4bA0ZMbLZe3Y+P/iBmysC4M0u9u9vr7pnBwAAwNUIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNS86kOgAMyvuS+bn7BaNcDN9QAwP8IOALdq7svm+cvucnM1AC4HhB0ALtXcmRuJszcA3I+wA8ClmjtzI3H2BoD7cYMyAAAwNcIOAAAwNS5jAWg1nqgC0J4QdgC0Gk9UAWhPCDsA2oUTBV9ryIiRTbZFdQ/Tjm1b3VwRgPaCsAOgXThnWByeTfpq4xI3VwOgPeEGZQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGqEHQAAYGodPV0AAFyqEwVfa8iIkQ7bo7qHace2rW6sCIA3IewAaPfOGRYNmPWUw/avNi5xYzUAvA2XsQAAgKlxZge4TCWNn6SisnKH7Wa69NPcZS4zjRNA0wg7wGWqqKz8srn009xlLjONE0DTuIwFAABMjbADAABMjbADAABMjbADAABMjbADAABMjaexADSpuce1T1itGuDmegDAWYQdAE1q7nHt/GV3ubkaAHAel7EAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpEXYAAICpeU3YefLJJ2WxWLRgwQLbsurqaqWmpiosLExdunTRlClTVFJSYree1WpVcnKyAgICFB4ersWLF+vcuXNurh4AAHgrrwg7H3/8sV5++WUNHjzYbvnChQu1bds2bd68Wfv27VNhYaEmT55sa6+vr1dycrJqa2t14MABvfLKK8rKytLy5cvdPQQAAOClPB52qqqqNH36dP3Xf/2XrrjiCtvyiooKbdiwQWvXrtWoUaM0bNgwZWZm6sCBAzp48KAk6f3339eXX36p1157TUOHDtXYsWO1evVqZWRkqLa21lNDAgAAXsTjYSc1NVXJyclKSEiwW56bm6u6ujq75f3791evXr2Uk5MjScrJydGgQYMUERFh65OYmKjKykodPXrU4T5rampUWVlpNwG4PJ0o+FpDRoxsckoaP8nT5QFwgY6e3PmmTZv06aef6uOPP27UVlxcLD8/P4WGhtotj4iIUHFxsa3Pvwad8+3n2xxJT0/XE088cYnVAzCDc4ZFA2Y91WTbVxuXuLkaAG3BY2d2vvnmG82fP19//OMf1alTJ7fue+nSpaqoqLBN33zzjVv3DwAA3MdjYSc3N1elpaW6/vrr1bFjR3Xs2FH79u3TCy+8oI4dOyoiIkK1tbU6deqU3XolJSWKjIyUJEVGRjZ6Ouv8/Pk+TfH391dwcLDdBAAAzMljYWf06NE6fPiwDh06ZJtuuOEGTZ8+3fZnX19fZWdn29bJy8uT1WpVfHy8JCk+Pl6HDx9WaWmprc/OnTsVHBysuLg4t48JAAB4H4/dsxMUFKSBAwfaLQsMDFRYWJht+ezZs5WWlqauXbsqODhYjzzyiOLj4zVixAhJ0pgxYxQXF6f7779fa9asUXFxsZYtW6bU1FT5+/u7fUwAAMD7ePQG5ZY899xz6tChg6ZMmaKamholJibqpZdesrX7+Pho+/btmjt3ruLj4xUYGKiUlBStWrXKg1UDAABv4lVhZ+/evXbznTp1UkZGhjIyMhyuExMTo7/+9a9tXBkAAGivPP6eHQAAgLZE2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKbmVY+eA3CtpPGTVFRW3mTbCatVA9xcDwB4AmEHMLGisnKHX/TOX3aXm6sBAM/gMhYAADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1XioIAA6cKPhaQ0aMbLItqnuYdmzb6uaKADiDsAMADpwzLA7fQP3VxiVurgaAs7iMBQAATI2wAwAATI2wAwAATI2wAwAATI2wAwAATI2wAwAATI2wAwAATI2wAwAATI2XCgLtXNL4SSoqK2+y7YTVqgFurgcAvA1hB2jnisrKHb7lN3/ZXW6uBgC8D5exAACAqRF2AACAqRF2AACAqRF2AACAqRF2AACAqRF2AACAqRF2AACAqRF2AACAqfFSQQBwwomCrzVkxEiH7VHdw7Rj21Y3VgTAEcIOADjhnGFx+OZqSfpq4xI3VgOgOVzGAgAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApkbYAQAApsYblIF2IGn8JBWVlTfZdsJq1QA31wMA7QlhB2gHisrKHX6aIH/ZXW6uBgDaFy5jAQAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAU+M9OwDQBk4UfK0hI0Y22RbVPUw7tm11c0XA5YuwAwBt4JxhcfgiyK82LnFzNcDlzanLWF9//bWr6wAAAGgTToWda665RnfccYdee+01VVdXO73zdevWafDgwQoODlZwcLDi4+P17rvv2tqrq6uVmpqqsLAwdenSRVOmTFFJSYndNqxWq5KTkxUQEKDw8HAtXrxY586dc7omAABgLk6FnU8//VSDBw9WWlqaIiMj9e///u/66KOPWr2dHj166Mknn1Rubq4++eQTjRo1ShMmTNDRo0clSQsXLtS2bdu0efNm7du3T4WFhZo8ebJt/fr6eiUnJ6u2tlYHDhzQK6+8oqysLC1fvtyZYQEAABNyKuwMHTpUv//971VYWKiNGzeqqKhIt9xyiwYOHKi1a9eqrKzsorYzfvx4jRs3Tn369FHfvn31H//xH+rSpYsOHjyoiooKbdiwQWvXrtWoUaM0bNgwZWZm6sCBAzp48KAk6f3339eXX36p1157TUOHDtXYsWO1evVqZWRkqLa21pmhAQAAk7mkR887duyoyZMna/PmzXrqqaeUn5+vRYsWqWfPnpoxY4aKioouelv19fXatGmTzpw5o/j4eOXm5qqurk4JCQm2Pv3791evXr2Uk5MjScrJydGgQYMUERFh65OYmKjKykrb2aGm1NTUqLKy0m4CAADmdElh55NPPtEvf/lLRUVFae3atVq0aJGOHz+unTt3qrCwUBMmTGhxG4cPH1aXLl3k7++vOXPmaOvWrYqLi1NxcbH8/PwUGhpq1z8iIkLFxcWSpOLiYrugc779fJsj6enpCgkJsU09e/Zs5cgBAEB74dSj52vXrlVmZqby8vI0btw4vfrqqxo3bpw6dPgpO8XGxiorK0u9e/ducVv9+vXToUOHVFFRoT//+c9KSUnRvn37nCnroi1dulRpaWm2+crKSgIPAAAm5VTYWbdunWbNmqWZM2cqKiqqyT7h4eHasGFDi9vy8/PTNddcI0kaNmyYPv74Y/3+97/X3XffrdraWp06dcru7E5JSYkiIyMlSZGRkY1ujD7/tNb5Pk3x9/eXv79/i7UBAID2z6nLWMeOHdPSpUsdBh3ppxCTkpLS6m03NDSopqZGw4YNk6+vr7Kzs21teXl5slqtio+PlyTFx8fr8OHDKi0ttfXZuXOngoODFRcX1+p9AwAA83HqzE5mZqa6dOmiqVOn2i3fvHmzzp49e9EhZ+nSpRo7dqx69eql06dP6/XXX9fevXv13nvvKSQkRLNnz1ZaWpq6du2q4OBgPfLII4qPj9eIESMkSWPGjFFcXJzuv/9+rVmzRsXFxVq2bJlSU1M5c4N2JWn8JBWVlTtsP2G1aoAb6wEAM3Eq7KSnp+vll19utDw8PFwPP/zwRYed0tJS21NbISEhGjx4sN577z3deeedkqTnnntOHTp00JQpU1RTU6PExES99NJLtvV9fHy0fft2zZ07V/Hx8QoMDFRKSopWrVrlzLAAjykqK3f4aQFJyl92lxurAQBzcSrsWK1WxcbGNloeExMjq9V60dtp6Z6eTp06KSMjQxkZGQ77xMTE6K9//etF7xMAAFxenLpnJzw8XF988UWj5Z9//rnCwsIuuSgAAABXcSrsTJs2Tb/61a+0Z88e1dfXq76+Xrt379b8+fN1zz33uLpGAAAApzl1GWv16tU6ceKERo8erY4df9pEQ0ODZsyYod/97ncuLRAAAOBSOBV2/Pz89Kc//UmrV6/W559/rs6dO2vQoEGKiYlxdX0AAACXxKmwc17fvn3Vt29fV9UCAADgck6Fnfr6emVlZSk7O1ulpaVqaGiwa9+9e7dLigMAALhUToWd+fPnKysrS8nJyRo4cKAsFour6wIAAHAJp8LOpk2b9Oabb2rcuHGurgcAAMClnHr0/F8/3gkAAODNnAo7jz76qH7/+9/LMAxX1wMAAOBSTl3G+vvf/649e/bo3Xff1bXXXitfX1+79i1btrikOAAAgEvlVNgJDQ3VpEmTXF0LAACAyzkVdjIzM11dBwAAQJtw6p4dSTp37px27dqll19+WadPn5YkFRYWqqqqymXFAQAAXCqnzuycPHlSSUlJslqtqqmp0Z133qmgoCA99dRTqqmp0fr1611dJwAAgFOcOrMzf/583XDDDfrxxx/VuXNn2/JJkyYpOzvbZcUBAABcKqfO7Pztb3/TgQMH5OfnZ7e8d+/e+u6771xSGAAAgCs4dWanoaFB9fX1jZZ/++23CgoKuuSiAAAAXMWpMztjxozR888/rz/84Q+SJIvFoqqqKq1YsYJPSABAC04UfK0hI0Y22RbVPUw7tm11c0WAuTkVdp599lklJiYqLi5O1dXVuvfee3Xs2DF169ZNb7zxhqtrBABTOWdYNGDWU022fbVxiZurAczPqbDTo0cPff7559q0aZO++OILVVVVafbs2Zo+fbrdDcsAAACe5lTYkaSOHTvqvvvuc2UtgKkljZ+korLyJttOWK0a4OZ6AOBy4VTYefXVV5ttnzFjhlPFAGZWVFbu8NJF/rK73FwNAFw+nAo78+fPt5uvq6vT2bNn5efnp4CAAMIOAADwGk49ev7jjz/aTVVVVcrLy9Mtt9zCDcoAAMCrOP1trAv16dNHTz75ZKOzPgAAAJ7ksrAj/XTTcmFhoSs3CQAAcEmcumfnL3/5i928YRgqKirSf/7nf+rmm292SWEAAACu4FTYmThxot28xWJR9+7dNWrUKD377LOuqAsAAMAlnAo7DQ0Nrq4DAACgTbj0nh0AAABv49SZnbS0tIvuu3btWmd2AQAA4BJOhZ3PPvtMn332merq6tSvXz9J0j/+8Q/5+Pjo+uuvt/WzWCyuqRIAAMBJToWd8ePHKygoSK+88oquuOIKST+9aPCBBx7QrbfeqkcffdSlRQIAADjLqXt2nn32WaWnp9uCjiRdccUV+u1vf8vTWAAAwKs4FXYqKytVVlbWaHlZWZlOnz59yUUBAAC4ilNhZ9KkSXrggQe0ZcsWffvtt/r222/1P//zP5o9e7YmT57s6hoBAACc5tQ9O+vXr9eiRYt07733qq6u7qcNdeyo2bNn6+mnn3ZpgQAAAJfCqbATEBCgl156SU8//bSOHz8uSbr66qsVGBjo0uIAAAAu1SW9VLCoqEhFRUXq06ePAgMDZRiGq+oCAABwCafCTnl5uUaPHq2+fftq3LhxKioqkiTNnj2bx84BAIBXcSrsLFy4UL6+vrJarQoICLAtv/vuu7Vjxw6XFQcAAHCpnLpn5/3339d7772nHj162C3v06ePTp486ZLCAAAAXMGpMztnzpyxO6Nz3g8//CB/f/9LLgoAAMBVnAo7t956q1599VXbvMViUUNDg9asWaM77rjDZcUBAABcKqcuY61Zs0ajR4/WJ598otraWj322GM6evSofvjhB+3fv9/VNQIAADjNqTM7AwcO1D/+8Q/dcsstmjBhgs6cOaPJkyfrs88+09VXX+3qGgEAAJzW6jM7dXV1SkpK0vr16/Wb3/ymLWoCAABwmVaf2fH19dUXX3zRFrUAAAC4nFOXse677z5t2LDB1bUAAAC4nFM3KJ87d04bN27Url27NGzYsEbfxFq7dq1LigMAALhUrQo7X3/9tXr37q0jR47o+uuvlyT94x//sOtjsVhcVx0AAMAlalXY6dOnj4qKirRnzx5JP30e4oUXXlBERESbFAcAAHCpWnXPzoVfNX/33Xd15swZlxYEAADgSk7ds3PeheEHuNwljZ+korLyJttOWK0a4OZ6AACtDDsWi6XRPTncowP8n6Kycg2Y9VSTbfnL7nJzNQAAqZVhxzAMzZw50/axz+rqas2ZM6fR01hbtmxxXYUAAACXoFVhJyUlxW7+vvvuc2kxAAAArtaqsJOZmdlWdQAAALQJp96gDAAA0F54NOykp6frZz/7mYKCghQeHq6JEycqLy/Prk91dbVSU1MVFhamLl26aMqUKSopKbHrY7ValZycrICAAIWHh2vx4sU6d+6cO4cCAAC8lEfDzr59+5SamqqDBw9q586dqqur05gxY+ze3bNw4UJt27ZNmzdv1r59+1RYWKjJkyfb2uvr65WcnKza2lodOHBAr7zyirKysrR8+XJPDAkAAHiZS3rPzqXasWOH3XxWVpbCw8OVm5urkSNHqqKiQhs2bNDrr7+uUaNGSfrpvqEBAwbo4MGDGjFihN5//319+eWX2rVrlyIiIjR06FCtXr1aS5Ys0cqVK+Xn5+eJoQEAAC/hVffsVFRUSJK6du0qScrNzVVdXZ0SEhJsffr3769evXopJydHkpSTk6NBgwbZfbIiMTFRlZWVOnr0aJP7qampUWVlpd0EAADMyWvCTkNDgxYsWKCbb75ZAwcOlCQVFxfLz89PoaGhdn0jIiJUXFxs63Pht7nOz5/vc6H09HSFhITYpp49e7p4NAAAwFt4TdhJTU3VkSNHtGnTpjbf19KlS1VRUWGbvvnmmzbfJwAA8AyP3rNz3rx587R9+3Z98MEH6tGjh215ZGSkamtrderUKbuzOyUlJYqMjLT1+eijj+y2d/5prfN9LuTv7297CzQAADA3j57ZMQxD8+bN09atW7V7927FxsbatQ8bNky+vr7Kzs62LcvLy5PValV8fLwkKT4+XocPH1Zpaamtz86dOxUcHKy4uDj3DAQAAHgtj57ZSU1N1euvv663335bQUFBtntsQkJC1LlzZ4WEhGj27NlKS0tT165dFRwcrEceeUTx8fEaMWKEJGnMmDGKi4vT/fffrzVr1qi4uFjLli1TamoqZ28AAIBnw866deskSbfffrvd8szMTM2cOVOS9Nxzz6lDhw6aMmWKampqlJiYqJdeesnW18fHR9u3b9fcuXMVHx+vwMBApaSkaNWqVe4aBgC4TdL4SSoqK2+yLap7mHZs2+rmigDv59GwYxhGi306deqkjIwMZWRkOOwTExOjv/71r64sDQC8UlFZuQbMeqrJtq82LnFzNUD74BU3KAMALt2Jgq81ZMTIJts464PLGWEHAEzinGHhrA/QBMIOAHiR5s7OSNIJq1UD3FgPYAaEHQDwIs2dnZGk/GV3ubEawBy85g3KAAAAbYGwAwAATI3LWEArNfeeE+6nAADvQ9gBWqm595xwPwUAeB8uYwEAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFMj7AAAAFPj21jABZr70KfExz4BoL0h7AAXaO5DnxIf+wSA9obLWAAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQ6eroAAEDbO1HwtYaMGNlkW1T3MO3YttXNFQHuQ9gBgMvAOcOiAbOearLtq41L3FwN4F5cxgIAAKZG2AEAAKZG2AEAAKZG2AEAAKbGDcq4LCWNn6SisvIm205YrRrg5noAAG2HsIPLUlFZucMnU/KX3eXmagAAbYnLWAAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQ8GnY++OADjR8/XtHR0bJYLHrrrbfs2g3D0PLlyxUVFaXOnTsrISFBx44ds+vzww8/aPr06QoODlZoaKhmz56tqqoqN44CAAB4M4+GnTNnzmjIkCHKyMhosn3NmjV64YUXtH79en344YcKDAxUYmKiqqurbX2mT5+uo0ePaufOndq+fbs++OADPfzww+4aAgAA8HIe/er52LFjNXbs2CbbDMPQ888/r2XLlmnChAmSpFdffVURERF66623dM899+irr77Sjh079PHHH+uGG26QJL344osaN26cnnnmGUVHRze57ZqaGtXU1NjmKysrXTwyAGg/ThR8rSEjRjpsj+oeph3btrqxIsC1PBp2mlNQUKDi4mIlJCTYloWEhGj48OHKycnRPffco5ycHIWGhtqCjiQlJCSoQ4cO+vDDDzVp0qQmt52enq4nnniizccAAO3BOcOiAbOectj+1cYlbqwGcD2vDTvFxcWSpIiICLvlERERtrbi4mKFh4fbtXfs2FFdu3a19WnK0qVLlZaWZpuvrKxUz549XVU6vETS+EkqKitvsu2E1aoBbq4HAOAZXht22pK/v7/8/f09XQbaWFFZucN/reYvu8vN1QAAPMVrHz2PjIyUJJWUlNgtLykpsbVFRkaqtLTUrv3cuXP64YcfbH0AAMDlzWvP7MTGxioyMlLZ2dkaOnSopJ8uN3344YeaO3euJCk+Pl6nTp1Sbm6uhg0bJknavXu3GhoaNHz4cE+VDjdp7jKVxKUqAMBPPBp2qqqqlJ+fb5svKCjQoUOH1LVrV/Xq1UsLFizQb3/7W/Xp00exsbF6/PHHFR0drYkTJ0qSBgwYoKSkJD300ENav3696urqNG/ePN1zzz0On8SCeTR3mUriUhUA4CceDTuffPKJ7rjjDtv8+ZuGU1JSlJWVpccee0xnzpzRww8/rFOnTumWW27Rjh071KlTJ9s6f/zjHzVv3jyNHj1aHTp00JQpU/TCCy+4fSwAAMA7eTTs3H777TIMw2G7xWLRqlWrtGrVKod9unbtqtdff70tygMAACbgtTcoAwAAuAJhBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmJrXvkEZAOAdThR8rSEjRjbZFtU9TDu2bXVzRUDrEHYAAM06Z1gcvq38q41L3FwN0HpcxgIAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKZG2AEAAKbGh0ABAE7ji+hoDwg7AACn8UV0tAeEHXi1pPGTVFRW3mTbCatVA9xcDwCg/SHswKsVlZU7/Fdj/rK73FwNAKA9IuwAANoE9/PAWxB2AABtgvt54C149BwAAJgaYQcAAJgaYQcAAJgaYQcAAJgaNyjD43iXDgCgLRF24HG8SwcA0JYIO3ALzt4AADyFsAO34OwNAMBTuEEZAACYGmEHAACYGmEHAACYGmEHAACYGjcowyWae9pK4okrAIDnEHbgEs09bSXxxBUAwHMIOwAAtztR8LWGjBjpsD2qe5h2bNvqxopgZoQdAIDbnTMszZ4N/mrjEjdWA7Mj7AAAvE5zZ34464PWIuwAALxOc2d+OOuD1uLRcwAAYGqEHQAAYGqEHQAAYGqEHQAAYGrcoIyL1txbknlDMgDAWxF2cNGae0syb0gGAHgrLmMBAABT48wOAKBd4YWDaC3CDgCgXeGFg2gtwo4JNXcjscS/fAAAlxfCjgk1dyOxJL37+FROAQMwpUv5mnpz/1Dk/43tG2HnMsQpYABm1dLX1Jv7x94Jq1VjV77RZBv/b2zfCDuw09y/iniXDoD2rrkwxCs0zIuwAzv8jwAAYDaEnXaKtxkDgPs0d9a76LtvFHVlT4frcr+P55km7GRkZOjpp59WcXGxhgwZohdffFE33nijp8u6JC0FGkfXljkDAwCu1dJZ7+buE+J+H88zRdj505/+pLS0NK1fv17Dhw/X888/r8TEROXl5Sk8PNzT5TnU0iPiBBoAAC6dKcLO2rVr9dBDD+mBBx6QJK1fv17vvPOONm7cqF//+tcers6xlh4RJ9AAQPvHG589r92HndraWuXm5mrp0qW2ZR06dFBCQoJycnKaXKempkY1NTW2+YqKCklSZWWly+ubfNe9Kv7+hybbrN9+q2v+ecbhukZDg+octDvb1lbbvVz22VbbvVz22VbbvVz22VbbvVz22VbbbWmfdfWGrpm2vMm2nb+doYE33NRkW3HRd4qMurLJtshuXbXlzdcd7rM5zf295I3bbc75v7cNw2i+o9HOfffdd4Yk48CBA3bLFy9ebNx4441NrrNixQpDEhMTExMTE5MJpm+++abZrNDuz+w4Y+nSpUpLS7PNNzQ06IcfflBYWJgsFosHK3ONyspK9ezZU998842Cg4M9XU6bYIzmwBjNgTGaQ3sco2EYOn36tKKjo5vt1+7DTrdu3eTj46OSkhK75SUlJYqMjGxyHX9/f/n7+9stCw0NbasSPSY4OLjd/MA6izGaA2M0B8ZoDu1tjCEhIS326eCGOtqUn5+fhg0bpuzsbNuyhoYGZWdnKz4+3oOVAQAAb9Duz+xIUlpamlJSUnTDDTfoxhtv1PPPP68zZ87Yns4CAACXL1OEnbvvvltlZWVavny5iouLNXToUO3YsUMRERGeLs0j/P39tWLFikaX6syEMZoDYzQHxmgOZh6jxTBael4LAACg/Wr39+wAAAA0h7ADAABMjbADAABMjbADAABMjbDTzvTu3VsWi6XRlJqa2mT/rKysRn07derk5qqb98EHH2j8+PGKjo6WxWLRW2+9ZdduGIaWL1+uqKgode7cWQkJCTp27FiL283IyFDv3r3VqVMnDR8+XB999FEbjaBlzY2xrq5OS5Ys0aBBgxQYGKjo6GjNmDFDhYWFzW5z5cqVjY5t//7923gkjrV0HGfOnNmo3qSkpBa3216Oo6QmfzctFouefvpph9v0tuOYnp6un/3sZwoKClJ4eLgmTpyovLw8uz7V1dVKTU1VWFiYunTpoilTpjR6seuFnP09bgstjfGHH37QI488on79+qlz587q1auXfvWrX9m+o+iIsz/jbeFijuPtt9/eqN45c+Y0u11vOo6tQdhpZz7++GMVFRXZpp07d0qSpk6d6nCd4OBgu3VOnjzprnIvypkzZzRkyBBlZGQ02b5mzRq98MILWr9+vT788EMFBgYqMTFR1dXVDrf5pz/9SWlpaVqxYoU+/fRTDRkyRImJiSotLW2rYTSruTGePXtWn376qR5//HF9+umn2rJli/Ly8vSLX/yixe1ee+21dsf273//e1uUf1FaOo6SlJSUZFfvG2+80ew229NxlGQ3tqKiIm3cuFEWi0VTpkxpdrvedBz37dun1NRUHTx4UDt37lRdXZ3GjBmjM2f+70OXCxcu1LZt27R582bt27dPhYWFmjx5crPbdeb3uK20NMbCwkIVFhbqmWee0ZEjR5SVlaUdO3Zo9uzZLW67tT/jbeVijqMkPfTQQ3b1rlmzptntetNxbBUXfIsTHjR//nzj6quvNhoaGppsz8zMNEJCQtxb1CWQZGzdutU239DQYERGRhpPP/20bdmpU6cMf39/44033nC4nRtvvNFITU21zdfX1xvR0dFGenp6m9TdGheOsSkfffSRIck4efKkwz4rVqwwhgwZ4triXKSpMaakpBgTJkxo1Xba+3GcMGGCMWrUqGb7ePNxNAzDKC0tNSQZ+/btMwzjp98/X19fY/PmzbY+X331lSHJyMnJaXIbzv4eu8uFY2zKm2++afj5+Rl1dXUO+zjzM+4uTY3xtttuM+bPn3/R2/D249gczuy0Y7W1tXrttdc0a9asZj9gWlVVpZiYGPXs2VMTJkzQ0aNH3VjlpSkoKFBxcbESEhJsy0JCQjR8+HDl5OQ0uU5tba1yc3Pt1unQoYMSEhIcruNtKioqZLFYWvxm27FjxxQdHa2rrrpK06dPl9VqdU+BTtq7d6/Cw8PVr18/zZ07V+Xl5Q77tvfjWFJSonfeeeeizgZ483E8f+mma9eukqTc3FzV1dXZHZf+/furV69eDo+LM7/H7nThGB31CQ4OVseOzb+LtzU/4+7kaIx//OMf1a1bNw0cOFBLly7V2bNnHW7D249jc0zxBuXL1VtvvaVTp05p5syZDvv069dPGzdu1ODBg1VRUaFnnnlGN910k44ePaoePXq4r1gnFRcXS1Kjt2FHRETY2i70/fffq76+vsl1/vd//7dtCnWh6upqLVmyRNOmTWv2Y3zDhw9XVlaW+vXrp6KiIj3xxBO69dZbdeTIEQUFBbmx4ouTlJSkyZMnKzY2VsePH9f/+3//T2PHjlVOTo58fHwa9W/vx/GVV15RUFBQi5d3vPk4NjQ0aMGCBbr55ps1cOBAST/9Tvr5+TUK4s39Tjrze+wuTY3xQt9//71Wr16thx9+uNlttfZn3F0cjfHee+9VTEyMoqOj9cUXX2jJkiXKy8vTli1bmtyONx/HlhB22rENGzZo7NixzX7aPj4+3u6DqDfddJMGDBigl19+WatXr3ZHmWiFuro63XXXXTIMQ+vWrWu279ixY21/Hjx4sIYPH66YmBi9+eabF3U2wd3uuece258HDRqkwYMH6+qrr9bevXs1evRoD1bWNjZu3Kjp06e3+ECANx/H1NRUHTlyxKP3ELW1lsZYWVmp5ORkxcXFaeXKlc1uy1t/xh2N8V/D26BBgxQVFaXRo0fr+PHjuvrqq91dZpviMlY7dfLkSe3atUsPPvhgq9bz9fXVddddp/z8/DaqzLUiIyMlqdGTHiUlJba2C3Xr1k0+Pj6tWscbnA86J0+e1M6dO5s9q9OU0NBQ9e3bt90c26uuukrdunVzWG97PY6S9Le//U15eXmt/v2UvOc4zps3T9u3b9eePXvszgJHRkaqtrZWp06dsuvf3HFx5vfYHRyN8bzTp08rKSlJQUFB2rp1q3x9fVu1/ZZ+xt2hpTH+q+HDh0uSw3q99TheDMJOO5WZmanw8HAlJye3ar36+nodPnxYUVFRbVSZa8XGxioyMlLZ2dm2ZZWVlfrwww/tzlj9Kz8/Pw0bNsxunYaGBmVnZztcx9POB51jx45p165dCgsLa/U2qqqqdPz48XZzbL/99luVl5c7rLc9HsfzNmzYoGHDhmnIkCGtXtfTx9EwDM2bN09bt27V7t27FRsba9c+bNgw+fr62h2XvLw8Wa1Wh8fFmd/jttTSGM/XN2bMGPn5+ekvf/mLU6/saOlnvC1dzBgvdOjQIUlyWK+3HcdW8ez90XBGfX290atXL2PJkiWN2u6//37j17/+tW3+iSeeMN577z3j+PHjRm5urnHPPfcYnTp1Mo4ePerOkpt1+vRp47PPPjM+++wzQ5Kxdu1a47PPPrM9ifTkk08aoaGhxttvv2188cUXxoQJE4zY2Fjjn//8p20bo0aNMl588UXb/KZNmwx/f38jKyvL+PLLL42HH37YCA0NNYqLi90+PsNofoy1tbXGL37xC6NHjx7GoUOHjKKiIttUU1Nj28aFY3z00UeNvXv3GgUFBcb+/fuNhIQEo1u3bkZpaaknhtjsGE+fPm0sWrTIyMnJMQoKCoxdu3YZ119/vdGnTx+jurrato32fBzPq6ioMAICAox169Y1uQ1vP45z5841QkJCjL1799r9LJ49e9bWZ86cOUavXr2M3bt3G5988okRHx9vxMfH222nX79+xpYtW2zzF/N77C4tjbGiosIYPny4MWjQICM/P9+uz7lz55oc48X+jHvLGPPz841Vq1YZn3zyiVFQUGC8/fbbxlVXXWWMHDnSbjvefBxbg7DTDr333nuGJCMvL69R22233WakpKTY5hcsWGD06tXL8PPzMyIiIoxx48YZn376qRurbdmePXsMSY2m8+NoaGgwHn/8cSMiIsLw9/c3Ro8e3WjsMTExxooVK+yWvfjii7ax33jjjcbBgwfdNKLGmhtjQUFBk22SjD179ti2ceEY7777biMqKsrw8/MzrrzySuPuu+828vPz3T+4/19zYzx79qwxZswYo3v37oavr68RExNjPPTQQ41CS3s+jue9/PLLRufOnY1Tp041uQ1vP46OfhYzMzNtff75z38av/zlL40rrrjCCAgIMCZNmmQUFRU12s6/rnMxv8fu0tIYHR1nSUZBQYHdds6vc7E/4+7S0hitVqsxcuRIo2vXroa/v79xzTXXGIsXLzYqKioabcdbj2NrWAzDMFx1lggAAMDbcM8OAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAI/au3evLBZLow9Lttb+/fs1aNAg+fr6auLEiU0ua+2+br/9di1YsOCS6gLgeYQdAM2aOXOmLBZLo8mdX3Lu3bt3kzU8+eSTtj5paWkaOnSoCgoKlJWV1eSym266SUVFRQoJCbmo/W7ZskWrV69uiyEBcKOOni4AgPdLSkpSZmam3bLu3bu7tYZVq1bpoYceslsWFBRk+/Px48c1Z84c9ejRo9llkZGRF73Prl27XkLFALwFZ3YAtMjf31+RkZF2k4+Pj2bOnGm7ZHTeggULdPvtt9vmGxoalJ6ertjYWHXu3FlDhgzRn//851bXEBQU1KiGwMBAnThxQhaLReXl5Zo1a5YsFouysrKaXNbUZaz9+/fr9ttvV0BAgK644golJibqxx9/lNT4MlZNTY0WLVqkK6+8UoGBgRo+fLj27t1ra8/KylJoaKjee+89DRgwQF26dFFSUpKKiorsxrJx40Zde+218vf3V1RUlObNmydJmjVrln7+85/b9a2rq1N4eLg2bNjQ6v9mAH5C2AHQptLT0/Xqq69q/fr1Onr0qBYuXKj77rtP+/btc8n2e/bsqaKiIgUHB+v5559XUVGRpk6d2mjZ3Xff3WjdQ4cOafTo0YqLi1NOTo7+/ve/a/z48aqvr29yX/PmzVNOTo42bdqkL774QlOnTlVSUpKOHTtm63P27Fk988wz+u///m998MEHslqtWrRoka193bp1Sk1N1cMPP6zDhw/rL3/5i6655hpJ0oMPPqgdO3bYhaPt27fr7NmzTdYP4CJ5+rPrALxbSkqK4ePjYwQGBtqmf/u3f7O1TZgwwa7//Pnzjdtuu80wDMOorq42AgICjAMHDtj1mT17tjFt2jTDMAxjz549hiTjxx9/dFhDTEyM4efnZ1dDYGCg8cEHH9j6hISEGJmZmXbrXbjswn1NmzbNuPnmmx3u97bbbjPmz59vGIZhnDx50vDx8TG+++47uz6jR482li5dahiGYWRmZhqSjPz8fFt7RkaGERERYZuPjo42fvOb3zjcZ1xcnPHUU0/Z5sePH2/MnDnTYX8ALeOeHQAtuuOOO7Ru3TrbfGBg4EWtl5+fr7Nnz+rOO++0W15bW6vrrruuVTUsXrxYM2fOtFt25ZVXtmobFzp06JCmTp16UX0PHz6s+vp69e3b1255TU2NwsLCbPMBAQG6+uqrbfNRUVEqLS2VJJWWlqqwsFCjR492uJ8HH3xQf/jDH/TYY4+ppKRE7777rnbv3t2aYQG4AGEHQIsCAwNtl1r+VYcOHWQYht2yuro625+rqqokSe+8806jYOLv79+qGrp169ZkDZeic+fOF923qqpKPj4+ys3NlY+Pj11bly5dbH/29fW1a7NYLLb/RhezvxkzZujXv/61cnJydODAAcXGxurWW2+96DoBNEbYAeC07t2768iRI3bLDh06ZPsLPy4uTv7+/rJarbrttts8UWKzBg8erOzsbD3xxBMt9r3uuutUX1+v0tJSp8NHUFCQevfurezsbN1xxx1N9gkLC9PEiROVmZmpnJwcPfDAA07tC8D/IewAcNqoUaP09NNP69VXX1V8fLxee+01HTlyxHaJKigoSIsWLdLChQvV0NCgW265RRUVFdq/f7+Cg4OVkpJy0fs6ffq0iouL7ZYFBAQoODjY6fqXLl2qQYMG6Ze//KXmzJkjPz8/7dmzR1OnTlW3bt3s+vbt21fTp0/XjBkz9Oyzz+q6665TWVmZsrOzNXjwYCUnJ1/UPleuXKk5c+YoPDxcY8eO1enTp7V//3498sgjtj4PPvigfv7zn6u+vr5V/40ANI2nsQA4LTExUY8//rgee+wx/exnP9Pp06c1Y8YMuz6rV6/W448/rvT0dA0YMEBJSUl65513FBsb26p9LV++XFFRUXbTY489dkn19+3bV++//74+//xz3XjjjYqPj9fbb7+tjh2b/ndgZmamZsyYoUcffVT9+vXTxIkT9fHHH6tXr14Xvc+UlBQ9//zzeumll3Tttdfq5z//ud3TXJKUkJCgqKgoJSYmKjo6+pLGCECyGBdecAcAeFRVVZWuvPJKZWZmavLkyZ4uB2j3uIwFAF6ioaFB33//vZ599lmFhobqF7/4hadLAkyBsAMAXsJqtSo2NlY9evRQVlaWw8tpAFqHy1gAAMDUuEEZAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACY2v8HdV+bFv1EOIgAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# check for outliers\n", + "sns.boxplot(df[\"fuel_efficiency_mpg\"]);" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 411 + }, + "id": "GwqW9iX0zBtO", + "outputId": "1fb6f248-a0db-4cdf-80a8-0fe432ab2130" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkAAAAGKCAYAAADkAf55AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMktJREFUeJzt3XtclHXe//H3gDKYyigqpwIFWyUPKJtlppmlq1JZHu4slzbR0k0xLbbDuqGkaVR3B9eNdG1NLbVzWpnZFh7I9VSaN+UvSQxFw6HMYAQTjJnfHz2cdeIgDgMDc72ej8f1WK7v9b2+85l778fO2+v6XtfX5HA4HAIAADAQP28XAAAA0NAIQAAAwHAIQAAAwHAIQAAAwHAIQAAAwHAIQAAAwHAIQAAAwHAIQAAAwHCaebuAxshut6ugoECtW7eWyWTydjkAAKAWHA6HTp48qYiICPn51XyNhwBUhYKCAkVGRnq7DAAA4IYjR47okksuqbEPAagKrVu3lvTr/wGDgoK8XA0AAKgNm82myMhI5+94TQhAVTh72ysoKIgABABAE1Ob6StMggYAAIZDAAIAAIZDAAIAAIZDAAIAAIZDAAIAAIZDAAIAAIZDAAIAAIZDAAIAAIbDixABGEZFRYWys7N14sQJBQcHKy4uTv7+/t4uC4AXEIAAGEJWVpZeeOEFWa1WZ1tYWJimTp2qgQMHerEyAN7ALTAAPi8rK0tpaWmKiYlRRkaG1q9fr4yMDMXExCgtLU1ZWVneLhFAAzM5HA6Ht4tobGw2mywWi4qLi1kLDGjiKioqlJiYqJiYGM2bN09+fv/9d5/dbldqaqry8vK0cuVKbocBTdyF/H5zBQiAT8vOzpbValViYqJL+JEkPz8/JSYm6tixY8rOzvZShQC8gQAEwKedOHFCkhQdHV3l8bPtZ/sBMAYCEACfFhwcLEnKy8ur8vjZ9rP9ABgDAQiAT4uLi1NYWJhWrVolu93ucsxut2vVqlUKDw9XXFyclyoE4A0EIAA+zd/fX1OnTtX27duVmpqqffv26dSpU9q3b59SU1O1fft2TZkyhQnQgMHwFFgVeAoM8D1VvQcoPDxcU6ZM4T1AgI+4kN9vAlAVCECAb+JN0IBvu5Dfb94EDcAw/P39FR8f7+0yADQCzAECAACGQwACAACGQwACAACGQwACAACGQwACAACGw1NgAAyDx+ABnOXVK0Dp6em64oor1Lp1a4WEhGjkyJHKyclx6TNo0CCZTCaX7Z577qlxXIfDodmzZys8PFwtWrTQkCFDdODAgfr8KgAauaysLCUmJur+++/XY489pvvvv1+JiYnKysrydmkAvMCrAWjLli1KTk7Wjh079PHHH+vMmTMaOnSoSktLXfpNmjRJx44dc25PPfVUjeM+9dRTWrhwoRYvXqydO3eqZcuWGjZsmE6fPl2fXwdAI5WVlaW0tDTFxMQoIyND69evV0ZGhmJiYpSWlkYIAgyoUb0J+ocfflBISIi2bNnifDX9oEGD1Lt3by1YsKBWYzgcDkVEROgvf/mLHnjgAUlScXGxQkNDtXz5ct1+++3nHYM3QQO+o6KiQomJiYqJidG8efPk5/fff/fZ7XalpqYqLy9PK1eu5HYY0MRdyO93o5oEXVxcLEkKDg52aV+1apXat2+vHj16aObMmTp16lS1Y+Tl5clqtWrIkCHONovFor59+2r79u1VnlNWViabzeayAfAN2dnZslqtSkxMdAk/kuTn56fExEQdO3ZM2dnZXqoQgDc0mknQdrtd9913n/r3768ePXo42//4xz+qY8eOioiIUHZ2th5++GHl5OTonXfeqXKcswsdhoaGurSHhoa6LIJ4rvT0dM2ZM8dD3wRAY3LixAlJUnR0dJXHz7af7QfAGBpNAEpOTtZXX32lrVu3urRPnjzZ+XfPnj0VHh6uwYMH6+DBg+rcubNHPnvmzJlKSUlx7ttsNkVGRnpkbADedfaKcl5enrp3717peF5enks/AMbQKG6BTZs2TevWrdOmTZt0ySWX1Ni3b9++kqTc3Nwqj4eFhUmSCgsLXdoLCwudx37LbDYrKCjIZQPgG+Li4hQWFqZVq1bJbre7HLPb7Vq1apXCw8MVFxfnpQoBeINXA5DD4dC0adO0Zs0abdy4sdpL1Ofau3evJCk8PLzK49HR0QoLC1NmZqazzWazaefOnerXr59H6gbQdPj7+2vq1Knavn27UlNTtW/fPp06dUr79u1Tamqqtm/frilTpjABGjAYrz4FNnXqVK1evVrvvvuuunbt6my3WCxq0aKFDh48qNWrV+uGG25Qu3btlJ2drfvvv1+XXHKJtmzZ4uwfGxur9PR0jRo1SpL05JNP6oknntCKFSsUHR2tWbNmKTs7W//v//0/BQYGnrcungIDfE9WVpZeeOEFl7mA4eHhmjJlivOpUwBN24X8fnt1DtCiRYsk/fqo+7mWLVumpKQkBQQE6JNPPtGCBQtUWlqqyMhIjRkzRqmpqS79c3JynE+QSdJDDz2k0tJSTZ48WUVFRRowYIA2bNhQq/ADwDcNHDhQ/fv3503QACQ1svcANRZcAQIAoOlpsu8BAgAAaAgEIAAAYDgEIAAAYDgEIAAAYDgEIAAAYDgEIAAAYDgEIAAAYDgEIAAAYDgEIAAAYDheXQoDABpSRUUFS2EAkEQAAmAQVS2GGhYWpqlTp7IYKmBA3AID4POysrKUlpammJgYZWRkaP369crIyFBMTIzS0tKUlZXl7RIBNDACEACfVlFRoRdeeEH9+vXTnDlzVF5eru3bt6u8vFxz5sxRv379tGjRIlVUVHi7VAANiFtgAHxadna2rFarRowYoT/96U+VboHddNNN2rZtm7KzsxUfH+/FSgE0JAIQAJ924sQJSdKLL76oq6++WrNmzVJ0dLTy8vK0atUq/etf/3LpB8AYuAUGwKe1adNGktSzZ0/NmzdP3bt310UXXaTu3btr3rx56tmzp0s/AMbAFSAAhlFRUaH/+7//cz4G36NHD2+XBMBLCEAAfFpRUZEk6csvv9RNN92ksrIy5zGz2ezcP9sPgDFwCwyATwsODvZoPwC+gQAEwKd1795d/v7+atmypYKCglyOBQUFqWXLlvL391f37t29VCEAb+AWGACftm/fPlVUVKi0tFTNmzfX2LFjFRERoYKCAv373/9WaWmpsx+PwQPGQQAC4NOOHz8u6dd3/nz//fd64403nMf8/PwUFhYmq9Xq7AfAGLgFBsCnnZ3cbLVa1bx5c5djzZs3d74YkUnQgLFwBQiATzt33k+vXr0UGRmp8vJyBQQE6MiRI9q1a1elfgB8HwEIgE8798rOrl27nIGnpn4AfB+3wAD4tJMnT3q0HwDfQAAC4NNqu8o7q8EDxkIAAuDTbDab828/P9f/yTt3/9x+AHwfAQiAT9u3b5/z72bNXKc9nrt/bj8Avo8ABMCnlZSUOP8uLy93OXbu/rn9APg+AhAAnxYZGenRfgB8g1cDUHp6uq644gq1bt1aISEhGjlypHJycpzHT5w4oXvvvVddu3ZVixYtFBUVpenTp6u4uLjGcZOSkmQymVy24cOH1/fXAdAI9enTx6P9APgGrwagLVu2KDk5WTt27NDHH3+sM2fOaOjQoc61eQoKClRQUKCnn35aX331lZYvX64NGzborrvuOu/Yw4cP17Fjx5zbq6++Wt9fB0AjVNtbW9wCA4zFqy9C3LBhg8v+8uXLFRISot27d2vgwIHq0aOH3n77befxzp07a/78+brjjjv0yy+/VJrQeC6z2aywsLB6qx1A07B//36P9gPgGxrVHKCzt7aCg4Nr7BMUFFRj+JGkzZs3KyQkRF27dtWUKVP0448/Vtu3rKxMNpvNZQPgG44ePerRfgB8Q6MJQHa7Xffdd5/69++vHj16VNnn+PHjeuyxxzR58uQaxxo+fLhefvllZWZm6sknn9SWLVuUkJBQ7YvO0tPTZbFYnBuTIQHfUVZW5tF+AHyDyeFwOLxdhCRNmTJFH374obZu3apLLrmk0nGbzaY//OEPCg4O1nvvvVdpVeeafPvtt+rcubM++eQTDR48uNLxsrIyl//xs9lsioyMdF5tAtB0jR8/XocPHz5vv44dO2rFihUNUBGA+mKz2WSxWGr1+90orgBNmzZN69at06ZNm6oMPydPntTw4cPVunVrrVmz5oLCjyTFxMSoffv2ys3NrfK42WxWUFCQywbAN8TGxnq0HwDf4NUA5HA4NG3aNK1Zs0YbN25UdHR0pT42m01Dhw5VQECA3nvvPQUGBl7w5xw9elQ//vijwsPDPVE2gCbEarV6tB8A3+DVAJScnKyVK1dq9erVat26taxWq6xWq37++WdJ/w0/paWlWrp0qWw2m7PPufN5YmNjtWbNGkm/Psr64IMPaseOHTp06JAyMzN1yy236NJLL9WwYcO88j0BeM8333zj0X4AfINXH4NftGiRJGnQoEEu7cuWLVNSUpL27NmjnTt3SpIuvfRSlz55eXnq1KmTJCknJ8f5BJm/v7+ys7O1YsUKFRUVKSIiQkOHDtVjjz0ms9lcv18IQKPzyy+/eLQfAN/g1QB0vvnXgwYNOm+f347TokULffTRR3WuDYBvMJvNOnPmTK36ATCORjEJGgDqy8UXX+zRfgB8AwEIgE+zWCwe7QfANxCAAPi0/Px8j/YD4Bu8OgcIMIrTp0/zA+slp06dqnU/ngTzjqioKLdecQLUBQEIaAD5+fnnXcIF3mWz2fjvyEuWLFmiLl26eLsMGAwBCGgAUVFRWrJkibfLMKTTp09r+vTpkqRWrVrpkksu0f79+xUbG6ujR4+qpKREkrRw4UKuQnhJVFSUt0uAARGAgAYQGBjIv3C9KDY2Vvv371dJSYn2798vSc7/PHs8Li7OW+UB8AImQQPweYsXL652ra/Y2FgtXry4gSsC4G0EIACGsHjxYq1bt069evWSJPXq1Uvr1q0j/AAGRQACYBitWrVScnKypF/XImzVqpWXKwLgLQQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOAQgAABgOF4NQOnp6briiivUunVrhYSEaOTIkcrJyXHpc/r0aSUnJ6tdu3Zq1aqVxowZo8LCwhrHdTgcmj17tsLDw9WiRQsNGTJEBw4cqM+vAgAAmhCvBqAtW7YoOTlZO3bs0Mcff6wzZ85o6NChKi0tdfa5//779f777+vNN9/Uli1bVFBQoNGjR9c47lNPPaWFCxdq8eLF2rlzp1q2bKlhw4bp9OnT9f2VAABAE9DMmx++YcMGl/3ly5crJCREu3fv1sCBA1VcXKylS5dq9erVuv766yVJy5Yt02WXXaYdO3boqquuqjSmw+HQggULlJqaqltuuUWS9PLLLys0NFRr167V7bffXv9fDAAANGqNag5QcXGxJCk4OFiStHv3bp05c0ZDhgxx9omNjVVUVJS2b99e5Rh5eXmyWq0u51gsFvXt27fac8rKymSz2Vw2AADgu9y6AlRdQDCZTDKbzQoICLjgMe12u+677z71799fPXr0kCRZrVYFBASoTZs2Ln1DQ0NltVqrHOdse2hoaK3PSU9P15w5cy64ZgAA0DS5dQWoTZs2atu2baWtTZs2atGihTp27Ki0tDTZ7fZaj5mcnKyvvvpKr732mjsl1cnMmTNVXFzs3I4cOdLgNQAAgIbj1hWg5cuX65FHHlFSUpKuvPJKSdKuXbu0YsUKpaam6ocfftDTTz8ts9msv/3tb+cdb9q0aVq3bp2ysrJ0ySWXONvDwsJUXl6uoqIil6tAhYWFCgsLq3Kss+2FhYUKDw93Oad3795VnmM2m2U2m89bJwAA8A1uBaAVK1bomWee0dixY51tI0aMUM+ePfXPf/5TmZmZioqK0vz582sMQA6HQ/fee6/WrFmjzZs3Kzo62uX45ZdfrubNmyszM1NjxoyRJOXk5Cg/P1/9+vWrcszo6GiFhYUpMzPTGXhsNpt27typKVOmuPN1AQCAj3HrFti2bdsUHx9fqT0+Pt450XjAgAHKz8+vcZzk5GStXLlSq1evVuvWrWW1WmW1WvXzzz9L+nXy8l133aWUlBRt2rRJu3fv1oQJE9SvXz+XJ8BiY2O1Zs0aSb/OQ7rvvvs0b948vffee/ryyy915513KiIiQiNHjnTn6wIAAB/j1hWgyMhILV26VE888YRL+9KlSxUZGSlJ+vHHH9W2bdsax1m0aJEkadCgQS7ty5YtU1JSkiTpueeek5+fn8aMGaOysjINGzZML7zwgkv/nJwc5xNkkvTQQw+ptLRUkydPVlFRkQYMGKANGzYoMDDQna8LAAB8jFsB6Omnn9att96qDz/8UFdccYUk6fPPP9f+/fv11ltvSZI+++wz3XbbbTWO43A4zvtZgYGBysjIUEZGRq3HMZlMmjt3rubOnXve8QEAgPG4FYBuvvlm7d+/X//85z/1zTffSJISEhK0du1aderUSZKYbwMAABott98EHR0dXekWGAAAQFPgdgD66aeftHTpUn399deSpG7dumnChAnOtzgDAAA0Vm49BZaVlaVOnTpp4cKF+umnn/TTTz9p4cKFio6OVlZWlqdrBAAA8Ci3rgAlJyfrtttu06JFi+Tv7y9Jqqio0NSpU5WcnKwvv/zSo0UCAAB4kltXgHJzc/WXv/zFGX4kyd/fXykpKcrNzfVYcQAAAPXBrQD0+9//3jn351xff/21evXqVeeiAAAA6pNbt8CmT5+uGTNmKDc31/lG5h07digjI0NPPPGEsrOznX3j4uI8UykAAICHuBWAxo0bJ+nXNy5XdcxkMsnhcMhkMqmioqJuFQIAAHiYWwEoLy/P03UAAAA0GLcCUMeOHT1dBwAAQINx+0WIBQUF2rp1q77//nvZ7XaXY9OnT69zYQAAAPXFrQC0fPly/fnPf1ZAQIDatWsnk8nkPGYymQhAAACgUXMrAM2aNUuzZ8/WzJkz5efn1pP0AAAAXuNWejl16pRuv/12wg8AAGiS3Eowd911l958801P1wIAANAg3LoFlp6erptuukkbNmxQz5491bx5c5fjzz77rEeKAwAAqA9uB6CPPvpIXbt2laRKk6ABAAAaM7cC0DPPPKOXXnpJSUlJHi4HAACg/rk1B8hsNqt///6ergUAAKBBuBWAZsyYoX/84x+ergUAAKBBuHULbNeuXdq4caPWrVun7t27V5oE/c4773ikOAAAgPrgVgBq06aNRo8e7elaAAAAGoRbAWjZsmW16vef//xHffr0kdlsdudjAAAA6kW9vso5ISFB3333XX1+BAAAwAWr1wDkcDjqc3gAAAC3sJgXAAAwHAIQAAAwHAIQAAAwnHoNQKwLBgAAGiMmQQMAAMNxKwClpaXp8OHD5+138uRJxcTEVHs8KytLI0aMUEREhEwmk9auXety3GQyVbn97//+b7VjPvroo5X6x8bG1vq7AQAA3+dWAHr33XfVuXNnDR48WKtXr1ZZWZlbH15aWqpevXopIyOjyuPHjh1z2V566SWZTCaNGTOmxnG7d+/uct7WrVvdqg8AAPgmt94EvXfvXn3xxRdatmyZZsyYoeTkZN1+++2aOHGirrjiilqPk5CQoISEhGqPh4WFuey/++67uu6662q8qiRJzZo1q3QuAADAWW7PAYqPj9fChQtVUFCgpUuX6ujRo+rfv7/i4uL097//XcXFxZ6sU4WFhfrggw901113nbfvgQMHFBERoZiYGCUmJio/P7/G/mVlZbLZbC4bAADwXXWeBO1wOHTmzBmVl5fL4XCobdu2ev755xUZGanXX3/dEzVKklasWKHWrVufdxHWvn37avny5dqwYYMWLVqkvLw8XXPNNTp58mS156Snp8tisTi3yMhIj9UNAAAaH7cD0O7duzVt2jSFh4fr/vvvV3x8vL7++mtt2bJFBw4c0Pz58zV9+nSPFfrSSy8pMTFRgYGBNfZLSEjQrbfeqri4OA0bNkzr169XUVGR3njjjWrPmTlzpoqLi53bkSNHPFY3AABofNyaA9SzZ0/t379fQ4cO1dKlSzVixAj5+/u79Bk3bpxmzJjhkSI//fRT5eTkuHVFqU2bNurSpYtyc3Or7WM2m1mxHgAAA3HrCtDYsWN16NAhffDBBxo5cmSl8CNJ7du3l91ur3OBkrR06VJdfvnl6tWr1wWfW1JSooMHDyo8PNwjtQAAgKbPrQA0a9YsXXzxxXX+8JKSEu3du1d79+6VJOXl5Wnv3r0uk5ZtNpvefPNN3X333VWOMXjwYD3//PPO/QceeEBbtmzRoUOHtG3bNo0aNUr+/v4aN25cnesFAAC+wa0ANGbMGD355JOV2p966indeuuttR7n888/V3x8vOLj4yVJKSkpio+P1+zZs519XnvtNTkcjmoDzMGDB3X8+HHn/tGjRzVu3Dh17dpVY8eOVbt27bRjxw516NCh1nUBAADfZnK4sV5Fhw4dtHHjRvXs2dOl/csvv9SQIUNUWFjosQK9wWazyWKxqLi4WEFBQd4uB4AHffPNN5o8ebKWLFmiLl26eLscAB50Ib/fbl0BKikpUUBAQKX25s2b8w4dAADQ6LkVgHr27FnlE1mvvfaaunXrVueiAAAA6pNbj8HPmjVLo0eP1sGDB3X99ddLkjIzM/Xqq6/qzTff9GiBAAAAnuZWABoxYoTWrl2rxx9/XG+99ZZatGihuLg4ffLJJ7r22ms9XSMAAIBHuRWAJOnGG2/UjTfe6MlaAAAAGoTbAUiSysvL9f3331d64WFUVFSdigIAAKhPbgWgAwcOaOLEidq2bZtLu8PhkMlkUkVFhUeKQ90VFhaquLjY22UAjcbhw4dd/hPAf1ksFoWGhnq7jAbhVgBKSkpSs2bNtG7dOoWHh8tkMnm6LnhAYWGh7vjTnTpTXubtUoBGZ/78+d4uAWh0mgeYtfKVlw0RgtwKQHv37tXu3bsVGxvr6XrgQcXFxTpTXqafY66VPdDi7XIAAI2Y3+li6dstKi4uJgBVp1u3bi7LT6BxswdaZG/Z3ttlAADQaLj1IsQnn3xSDz30kDZv3qwff/xRNpvNZQMAAGjM3LoCNGTIEEm/rsR+LiZBAwCApsCtALRp0yZP1wEAANBg3ApAvO0ZAAA0ZW7NAZKkTz/9VHfccYeuvvpqfffdd5KkV155RVu3bvVYcQAAAPXBrQD09ttva9iwYWrRooX27NmjsrJf3zNTXFysxx9/3KMFAgAAeJpbAWjevHlavHixXnzxRTVv3tzZ3r9/f+3Zs8djxQEAANQHtwJQTk6OBg4cWKndYrGoqKiorjUBAADUK7cCUFhYmHJzcyu1b926VTExMXUuCgAAoD65FYAmTZqkGTNmaOfOnTKZTCooKNCqVav0wAMPaMqUKZ6uEQAAwKPcegz+r3/9q+x2uwYPHqxTp05p4MCBMpvNeuCBB3Tvvfd6ukYAAACPcisAmUwmPfLII3rwwQeVm5urkpISdevWTa1atfJ0fQAAAB7nVgA6KyAgQN26dfNULQAAAA2i1gFo9OjRWr58uYKCgjR69Oga+77zzjt1LgwAAKC+1DoAWSwWmUwm598AAABNVa0D0LJly6r8GwAAoKlx6zH4vLw8HThwoFL7gQMHdOjQobrWBAAAUK/cCkBJSUnatm1bpfadO3cqKSmprjUBAADUK7cC0BdffKH+/ftXar/qqqu0d+/eutYEAABQr9wKQCaTSSdPnqzUXlxcrIqKijoXBQAAUJ/cCkADBw5Uenq6S9ipqKhQenq6BgwY4LHiAAAA6oNbL0J88sknNXDgQHXt2lXXXHONJOnTTz+VzWbTxo0bPVogAACAp7l1Bahbt27Kzs7W2LFj9f333+vkyZO68847tX//fvXo0aPW42RlZWnEiBGKiIiQyWTS2rVrXY4nJSXJZDK5bMOHDz/vuBkZGerUqZMCAwPVt29f7dq160K/IgAA8GFuL4URERGhxx9/vE4fXlpaql69emnixInVvl16+PDhLu8dMpvNNY75+uuvKyUlRYsXL1bfvn21YMECDRs2TDk5OQoJCalTvQAAwDfUOgBlZ2erR48e8vPzU3Z2do194+LiajVmQkKCEhISauxjNpsVFhZW2zL17LPPatKkSZowYYIkafHixfrggw/00ksv6a9//WutxwEAAL6r1gGod+/eslqtCgkJUe/evWUymeRwOCr1M5lMHn0SbPPmzQoJCVHbtm11/fXXa968eWrXrl2VfcvLy7V7927NnDnT2ebn56chQ4Zo+/bt1X5GWVmZysrKnPs2m81j9TcGfj8XebsEAEAjZ7TfiloHoLy8PHXo0MH5d0MYPny4Ro8erejoaB08eFB/+9vflJCQoO3bt8vf379S/+PHj6uiokKhoaEu7aGhodq/f3+1n5Oenq45c+Z4vP7GokVelrdLAACgUal1ABo1apQyMzPVtm1brVixQg888IAuuuii+qxNt99+u/Pvnj17Ki4uTp07d9bmzZs1ePBgj33OzJkzlZKS4ty32WyKjIz02Pje9nP0QNlbtPF2GQCARszv5yJD/YO51gHo66+/Vmlpqdq2bas5c+bonnvuqfcA9FsxMTFq3769cnNzqwxA7du3l7+/vwoLC13aCwsLa5xHZDabzzu5uimzt2gje8v23i4DAIBG44LmAE2YMEEDBgyQw+HQ008/rVatWlXZd/bs2R4r8FxHjx7Vjz/+qPDw8CqPBwQE6PLLL1dmZqZGjhwpSbLb7crMzNS0adPqpSYAAND01DoALV++XGlpaVq3bp1MJpM+/PBDNWtW+XSTyVTrAFRSUqLc3Fznfl5envbu3avg4GAFBwdrzpw5GjNmjMLCwnTw4EE99NBDuvTSSzVs2DDnOYMHD9aoUaOcASclJUXjx49Xnz59dOWVV2rBggUqLS11PhUGAABQ6wDUtWtXvfbaa5J+fbIqMzOzzu/V+fzzz3Xdddc598/Owxk/frwWLVqk7OxsrVixQkVFRYqIiNDQoUP12GOPudyuOnjwoI4fP+7cv+222/TDDz9o9uzZslqt6t27tzZs2FBpYjQAADCuWgeg3//+985J0GlpadXe/roQgwYNqvJR+rM++uij845x6NChSm3Tpk3jlhcAAKhWrZfCODsJWpLmzp2rkpKSeisKAACgPjWpSdAAAACe4NVJ0AAAAN7g1UnQAAAA3uDWavB2u93TdQAAADSYWk+C/q1XXnlF/fv3V0REhA4fPixJeu655/Tuu+96rDgAAID64FYAWrRokVJSUnTDDTeoqKjIufp727ZttWDBAk/WBwAA4HFuBaB//OMfevHFF/XII4+4rMrep08fffnllx4rDgAAoD64FYDy8vIUHx9fqd1sNjvfFQQAANBYuRWAoqOjtXfv3krtGzZs0GWXXVbXmgAAAOqVW0+BpaSkKDk5WadPn5bD4dCuXbv06quvKj09Xf/61788XSMAAIBHuRWA7r77brVo0UKpqak6deqU/vjHPyoiIkJ///vfdfvtt3u6RgAAAI9yKwBJUmJiohITE3Xq1CmVlJRU+VLE//znP+rTp4/L6u0AAADe5vZ7gM666KKLqn0jdEJCgr777ru6fgQAAIBH1TkA1cThcNTn8AAAAG6p1wAEAADQGBGAAACA4RCAAACA4dRrADKZTPU5PAAAgFuYBA0AAAzH7fcA1cbJkyfrc3gAAAC31DoAxcfH1/qW1p49e9wuCJ7nd7rY2yUAABo5o/1W1DoAjRw5sh7LQH2wWCxqHmCWvt3i7VIAAE1A8wCzLBaLt8toECYHE3UqsdlsslgsKi4uVlBQkLfLqZPCwkIVFxsr1QM1OXz4sObPn69HHnlEHTt29HY5QKNisVgUGhrq7TLcdiG/327PASoqKtJbb72lgwcP6sEHH1RwcLD27Nmj0NBQXXzxxe4OCw8LDQ1t0v/PDNSXjh07qkuXLt4uA4CXuBWAsrOzNWTIEFksFh06dEiTJk1ScHCw3nnnHeXn5+vll1/2dJ0AAAAe49Zj8CkpKUpKStKBAwcUGBjobL/hhhuUlZXlseIAAADqg1sB6LPPPtOf//znSu0XX3yxrFZrnYsCAACoT24FILPZLJvNVqn9m2++UYcOHepcFAAAQH1yKwDdfPPNmjt3rs6cOSPp1yUv8vPz9fDDD2vMmDEeLRAAAMDT3ApAzzzzjEpKShQSEqKff/5Z1157rS699FK1bt1a8+fP93SNAAAAHuVWALJYLPr444/1/vvva+HChZo2bZrWr1+vLVu2qGXLlrUeJysrSyNGjFBERIRMJpPWrl3rPHbmzBk9/PDD6tmzp1q2bKmIiAjdeeedKigoqHHMRx99VCaTyWWLjY1152sCAAAfVae1wAYMGKABAwa4fX5paal69eqliRMnavTo0S7HTp06pT179mjWrFnq1auXfvrpJ82YMUM333yzPv/88xrH7d69uz755BPnfrNm9brkGQAAaGLcSgZz586t8fjs2bNrNU5CQoISEhKqPHb2KtO5nn/+eV155ZXKz89XVFRUteM2a9ZMYWFhtaoBAAAYj1sBaM2aNS77Z86cUV5enpo1a6bOnTvXOgBdqOLiYplMJrVp06bGfgcOHFBERIQCAwPVr18/paen1xiYysrKVFZW5tyv6gk3AADgO9wKQF988UWlNpvNpqSkJI0aNarORVXl9OnTevjhhzVu3Lga1/fo27evli9frq5du+rYsWOaM2eOrrnmGn311Vdq3bp1leekp6drzpw59VI3AABofNyaBF2VoKAgzZkzR7NmzfLUkE5nzpzR2LFj5XA4tGjRohr7JiQk6NZbb1VcXJyGDRum9evXq6ioSG+88Ua158ycOVPFxcXO7ciRI57+CgAAoBHx6OzgswHCk86Gn8OHD2vjxo0XvDp7mzZt1KVLF+Xm5lbbx2w2y2w217VUAADQRLgVgBYuXOiy73A4dOzYMb3yyivVTmp2x9nwc+DAAW3atEnt2rW74DFKSkp08OBB/elPf/JYXQAAoGmrdQDKzs5Wjx495Ofnp+eee87lmJ+fnzp06KDx48dr5syZtf7wkpISlyszeXl52rt3r4KDgxUeHq7/+Z//0Z49e7Ru3TpVVFQ41xkLDg5WQECAJGnw4MEaNWqUpk2bJkl64IEHNGLECHXs2FEFBQVKS0uTv7+/xo0bV+u6AACAb6t1AIqPj9exY8cUEhIi6dcFUdu3b1+nD//888913XXXOfdTUlIkSePHj9ejjz6q9957T5LUu3dvl/M2bdqkQYMGSZIOHjyo48ePO48dPXpU48aN048//qgOHTpowIAB2rFjB2uUAQAAp1oHoDZt2igvL08hISHKz8+Xw+Go84cPGjSoxnFq8xmHDh1y2X/ttdfqWhYAAPBxtQ5AY8aM0bXXXqvw8HBJUp8+feTv719l32+//dYz1QEAANSDWgegJUuWaPTo0crNzdX06dM1adKkat+rAwAA0Jhd0FNgw4cPlyTt3r1bM2bMIAABAIAmya3H4JctW+bpOgAAABqMx94EDQAA0FQQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOEQgAAAgOF4NQBlZWVpxIgRioiIkMlk0tq1a12OOxwOzZ49W+Hh4WrRooWGDBmiAwcOnHfcjIwMderUSYGBgerbt6927dpVT98AAAA0RV4NQKWlperVq5cyMjKqPP7UU09p4cKFWrx4sXbu3KmWLVtq2LBhOn36dLVjvv7660pJSVFaWpr27NmjXr16adiwYfr+++/r62sAAIAmxqsBKCEhQfPmzdOoUaMqHXM4HFqwYIFSU1N1yy23KC4uTi+//LIKCgoqXSk617PPPqtJkyZpwoQJ6tatmxYvXqyLLrpIL730Uj1+EwAA0JQ02jlAeXl5slqtGjJkiLPNYrGob9++2r59e5XnlJeXa/fu3S7n+Pn5aciQIdWeI0llZWWy2WwuGwAA8F2NNgBZrVZJUmhoqEt7aGio89hvHT9+XBUVFRd0jiSlp6fLYrE4t8jIyDpWDwAAGrNGG4Aa0syZM1VcXOzcjhw54u2SAABAPWq0ASgsLEySVFhY6NJeWFjoPPZb7du3l7+//wWdI0lms1lBQUEuGwAA8F2NNgBFR0crLCxMmZmZzjabzaadO3eqX79+VZ4TEBCgyy+/3OUcu92uzMzMas8BAADG08ybH15SUqLc3Fznfl5envbu3avg4GBFRUXpvvvu07x58/S73/1O0dHRmjVrliIiIjRy5EjnOYMHD9aoUaM0bdo0SVJKSorGjx+vPn366Morr9SCBQtUWlqqCRMmNPTXAwAAjZRXA9Dnn3+u6667zrmfkpIiSRo/fryWL1+uhx56SKWlpZo8ebKKioo0YMAAbdiwQYGBgc5zDh48qOPHjzv3b7vtNv3www+aPXu2rFarevfurQ0bNlSaGA0AAIzL5HA4HN4uorGx2WyyWCwqLi5mPhDgY7755htNnjxZS5YsUZcuXbxdDgAPupDf70Y7BwgAAKC+EIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAAIDhEIAAGEZFRYVycnIkSTk5OaqoqPByRQC8xeRwOBzeLqKxsdlsslgsKi4uVlBQkLfLAeABWVlZysjIUGFhobMtNDRUycnJGjhwoBcrA+ApF/L7zRUgAD4vKytLs2fPdgk/klRYWKjZs2crKyvLS5UB8BauAFWBK0DwtNOnTys/P9/bZRiS3W7X9OnTVV5eXm2fgIAALVy4UH5+/JvQG6KiohQYGOjtMuADLuT3u1kD1QQYWn5+viZPnuztMlCN8vJy3XPPPd4uw7CWLFmiLl26eLsMGAwBCGgAUVFRWrJkibfLMKRnn31W+/fvP2+/2NhYpaSkNEBF+K2oqChvlwADIgABDSAwMJB/4XrJDz/8UOt+/HcEGAc3vAH4tNo+6s4j8YCxEIAAAIDhEIAA+DR/f3+P9gPgGwhAAHxaQECAR/sB8A2NPgB16tRJJpOp0pacnFxl/+XLl1fqy/slAOP65ZdfPNoPgG9o9E+BffbZZy6TE7/66iv94Q9/0K233lrtOUFBQc71fiTJZDLVa40AGq+ioiKP9gPgGxp9AOrQoYPL/hNPPKHOnTvr2muvrfYck8mksLCw+i4NAAA0UY3+Fti5ysvLtXLlSk2cOLHGqzolJSXq2LGjIiMjdcstt2jfvn01jltWViabzeayAfANzAECUJUmFYDWrl2roqIiJSUlVduna9eueumll/Tuu+9q5cqVstvtuvrqq3X06NFqz0lPT5fFYnFukZGR9VA9AG+46KKLPNoPgG9oUouhDhs2TAEBAXr//fdrfc6ZM2d02WWXady4cXrssceq7FNWVqaysjLnvs1mU2RkJIuhAj7gzjvvrNVCtFFRUXr55ZcboCIA9cUnF0M9fPiwPvnkE73zzjsXdF7z5s0VHx+v3NzcavuYzWaZzea6lgigEeIWGICqNJlbYMuWLVNISIhuvPHGCzqvoqJCX375pcLDw+upMgCNGQEIQFWaRACy2+1atmyZxo8fr2bNXC9a3XnnnZo5c6Zzf+7cufr3v/+tb7/9Vnv27NEdd9yhw4cP6+67727osgE0ArV9DxjvCwOMpUncAvvkk0+Un5+viRMnVjqWn58vP7//5riffvpJkyZNktVqVdu2bXX55Zdr27Zt6tatW0OWDKCR+N3vfqc9e/bUqh8A42gSAWjo0KGqbq725s2bXfafe+45Pffccw1QFYCmIDg42KP9APiGJnELDADcRQACUBUCEACfRgACUBUCEACfdu47vjzRD4BvIAAB8Glvv/22R/sB8A0EIAA+zWq1Ov/+7RqC5+6f2w+A72sST4EBgLvOXeNr/fr12r9/v06cOKHg4GDFxsYqISGhUj8Avo8rQAB8WqtWrZx/z5kzRwEBAerXr58CAgI0Z86cKvsB8H1cAQLg0869srNjxw7t2LHjvP0A+D6uAAHwaXFxcR7tB8A3EIAA+LRRo0Y5l8v57YKnZ/f9/Pw0atSoBq8NgPcQgAD4tICAAI0dO1aS9Msvv7gcO7s/duxYVoMHDIYABMDn3XPPPerfv7/sdrtLu91uV//+/XXPPfd4qTIA3sIkaAA+LysrS9u2bdNVV12liy++WGVlZTKbzfruu++0bds2ZWVlaeDAgd4uE0ADIgAB8GkVFRV64YUX1K9fP82bN885H0j69QpQamqqFi1apP79+8vf39+LlQJoSNwCA+DTsrOzZbValZiY6BJ+pF8nPycmJurYsWPKzs72UoUAvIEABMCnnThxQpIUHR1d5fGz7Wf7ATAGAhAAnxYcHCxJysvLq/L42faz/QAYAwEIgE+Li4tTWFiYVq1aVeVTYKtWrVJ4eDgvQgQMhgAEwKf5+/tr6tSp2r59u1JTU7Vv3z6dOnVK+/btU2pqqrZv364pU6YwARowGJPD4XB4u4jGxmazyWKxqLi4WEFBQd4uB4AHZGVl6YUXXpDVanW2hYeHa8qUKTwCD/iIC/n9JgBVgQAE+KaKigplZ2frxIkTCg4OVlxcHFd+AB9yIb/fvAcIgGH4+/srPj7e22UAaASYAwQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHAAQAAAyHN0FX4ezqIDabzcuVAACA2jr7u12bVb4IQFU4efKkJCkyMtLLlQAAgAt18uRJWSyWGvuwGGoV7Ha7CgoK1Lp1a5lMJm+XA8CDbDabIiMjdeTIERY7BnyMw+HQyZMnFRERIT+/mmf5EIAAGMqFrBYNwHcxCRoAABgOAQgAABgOAQiAoZjNZqWlpclsNnu7FABexBwgAABgOFwBAgAAhkMAAgAAhkMAAgAAhkMAAgAAhkMAAgAAhkMAAgAAhkMAAgAAhkMAAgAAhvP/AeF/xXsCqZLEAAAAAElFTkSuQmCC\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# check for missing values\n", + "df.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 429 + }, + "id": "LK1-Dwe7zjlX", + "outputId": "6930ecd0-1f60-4f82-b29c-8c2f9cee89df" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "num_cylinders 482\n", + "horsepower 708\n", + "vehicle_weight 0\n", + "acceleration 930\n", + "model_year 0\n", + "origin 0\n", + "fuel_type 0\n", + "drivetrain 0\n", + "num_doors 502\n", + "fuel_efficiency_mpg 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
num_cylinders482
horsepower708
vehicle_weight0
acceleration930
model_year0
origin0
fuel_type0
drivetrain0
num_doors502
fuel_efficiency_mpg0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df[\"horsepower\"].head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "Stn5QpQcz1aF", + "outputId": "8b6c9ff2-9e2a-4e8d-f7c3-e672bdad7e51" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 159.0\n", + "1 97.0\n", + "2 78.0\n", + "3 NaN\n", + "4 140.0\n", + "Name: horsepower, dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
horsepower
0159.0
197.0
278.0
3NaN
4140.0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# median of horsepower column\n", + "df[\"horsepower\"].median()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ROkSz3LS0IBI", + "outputId": "cca41f00-3731-44b9-caa4-c9f3e462e7b5" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "149.0" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# filter for the cols we'll only be using\n", + "\"\"\"\n", + "'engine_displacement',\n", + "'horsepower',\n", + "'vehicle_weight',\n", + "'model_year',\n", + "'fuel_efficiency_mpg'\n", + "\"\"\"\n", + "df.columns" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eDVKFZEU0Zuh", + "outputId": "164921f1-655c-42f3-babd-d0626d5dbd7a" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',\n", + " 'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',\n", + " 'num_doors', 'fuel_efficiency_mpg'],\n", + " dtype='object')" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "code", + "source": [ + "cols_to_keep=['engine_displacement',\n", + " 'horsepower',\n", + " 'vehicle_weight',\n", + " 'model_year',\n", + " 'fuel_efficiency_mpg']\n", + "\n", + "df2 = df[cols_to_keep]\n", + "df2.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "NYdpYfMT0skt", + "outputId": "a74b8eac-c039-42d1-94aa-14b4cf421b7e" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement horsepower vehicle_weight model_year \\\n", + "0 170 159.0 3413.433759 2003 \n", + "1 130 97.0 3149.664934 2007 \n", + "2 170 78.0 3079.038997 2018 \n", + "3 220 NaN 2542.392402 2009 \n", + "4 210 140.0 3460.870990 2009 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 13.231729 \n", + "1 13.688217 \n", + "2 14.246341 \n", + "3 16.912736 \n", + "4 12.488369 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementhorsepowervehicle_weightmodel_yearfuel_efficiency_mpg
0170159.03413.433759200313.231729
113097.03149.664934200713.688217
217078.03079.038997201814.246341
3220NaN2542.392402200916.912736
4210140.03460.870990200912.488369
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df2", + "summary": "{\n \"name\": \"df2\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 30,\n 260,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.879555200339446,\n \"min\": 37.0,\n \"max\": 271.0,\n \"num_unique_values\": 192,\n \"samples\": [\n 128.0,\n 82.0,\n 167.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.8948600311838,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2535.8875912388694,\n 2741.1704843893167,\n 2471.8802372627765\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2005,\n 2002,\n 2003\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.5564677028974288,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 16.642943419221385,\n 16.298377150953442,\n 18.59182197290521\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "code", + "source": [ + "shuffled_df = df2.sample(frac=1, random_state=42).reset_index(drop=True)\n", + "shuffled_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "4_T8CSxf1D4i", + "outputId": "d1b3e668-489c-487d-ee88-a9696b02aa68" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement horsepower vehicle_weight model_year \\\n", + "0 220 144.0 2535.887591 2009 \n", + "1 160 141.0 2741.170484 2019 \n", + "2 230 155.0 2471.880237 2017 \n", + "3 150 206.0 3748.164469 2015 \n", + "4 300 111.0 2135.716359 2006 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 16.642943 \n", + "1 16.298377 \n", + "2 18.591822 \n", + "3 11.818843 \n", + "4 19.402209 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementhorsepowervehicle_weightmodel_yearfuel_efficiency_mpg
0220144.02535.887591200916.642943
1160141.02741.170484201916.298377
2230155.02471.880237201718.591822
3150206.03748.164469201511.818843
4300111.02135.716359200619.402209
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "shuffled_df", + "summary": "{\n \"name\": \"shuffled_df\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 10,\n 200,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.879555200339414,\n \"min\": 37.0,\n \"max\": 271.0,\n \"num_unique_values\": 192,\n \"samples\": [\n 101.0,\n 232.0,\n 142.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.89486003118515,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2952.280077222956,\n 3527.0988925098245,\n 2697.391829652872\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2020,\n 2022,\n 2009\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.556467702897423,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 14.748942858764156,\n 11.437054433578467,\n 15.54922028907195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "code", + "source": [ + "shuffled_df.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_Wj67MfT3UTt", + "outputId": "b8872cb9-b965-4626-fe3a-6444d376367a" + }, + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(9704, 5)" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# col with missing vals\n", + "shuffled_df.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "KEc7ajG99Wk9", + "outputId": "b0509ed5-c100-4e4b-8150-eb2dbf8fb8f6" + }, + "execution_count": 45, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "horsepower 708\n", + "vehicle_weight 0\n", + "model_year 0\n", + "fuel_efficiency_mpg 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
horsepower708
vehicle_weight0
model_year0
fuel_efficiency_mpg0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X = shuffled_df.drop(columns='fuel_efficiency_mpg')\n", + "y = shuffled_df[\"fuel_efficiency_mpg\"]\n", + "print(f\"Shape of X: {X.shape}\\nShape of y: {y.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gUmPB4ne3GFr", + "outputId": "0df65a32-87d8-45f7-f292-a4ad0a73d652" + }, + "execution_count": 100, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of X: (9704, 4)\n", + "Shape of y: (9704,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "shuffled_df.dtypes" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "_dOdZaxmCwDs", + "outputId": "a6ba7bfe-fdcf-41b6-e110-8253d4369de7" + }, + "execution_count": 101, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement int64\n", + "horsepower float64\n", + "vehicle_weight float64\n", + "model_year int64\n", + "fuel_efficiency_mpg float64\n", + "dtype: object" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacementint64
horsepowerfloat64
vehicle_weightfloat64
model_yearint64
fuel_efficiency_mpgfloat64
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 101 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 209 + }, + "id": "B4ek_9ZxCcAN", + "outputId": "67dfc425-3543-455a-8e4d-0b1484543d0e" + }, + "execution_count": 102, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "horsepower 708\n", + "vehicle_weight 0\n", + "model_year 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
horsepower708
vehicle_weight0
model_year0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 102 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# fill the missing vals in horsepower with 0 then mean of horsepower col from training data\n", + "\"\"\"\n", + "\n", + "X[\"horsepower\"] = X[\"horsepower\"].fillna()\n", + "X.isnull().sum()\n", + "\"\"\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + }, + "id": "OYb8n8H_C6ta", + "outputId": "2f35abd7-8369-433a-fbd6-205811461e21" + }, + "execution_count": 94, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'\\n\\nX[\"horsepower\"] = X[\"horsepower\"].fillna()\\nX.isnull().sum()\\n'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 94 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# train, val, test split\n", + "np.random.seed(42)\n", + "X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4)\n", + "X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)" + ], + "metadata": { + "id": "9HFd6E6Z3SxL" + }, + "execution_count": 104, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(X_train.shape)\n", + "print(y_train.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "K0EETjBu3G2X", + "outputId": "564bdffa-f523-476e-f375-68e11bcad3c4" + }, + "execution_count": 105, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(5822, 4)\n", + "(5822,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(X_test.shape)\n", + "print(y_test.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3h-4RONL2IkJ", + "outputId": "5d16a248-3f46-4d28-8bb4-c73c28324ddd" + }, + "execution_count": 106, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(1941, 4)\n", + "(1941,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(y_val.shape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QDbJgNKPEbGf", + "outputId": "d3fa356c-2580-4f11-917b-b3d270aa8259" + }, + "execution_count": 107, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(1941,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "X_train.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 209 + }, + "id": "8EnMEuMSIVuM", + "outputId": "841faaba-1d7d-4265-f533-c4a2394c906e" + }, + "execution_count": 108, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "horsepower 420\n", + "vehicle_weight 0\n", + "model_year 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
horsepower420
vehicle_weight0
model_year0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 108 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# impute with the mean of X_train\n", + "mean_val = X_train[\"horsepower\"].mean()\n", + "X_train[\"horsepower\"] = X_train[\"horsepower\"].fillna(mean_val)\n", + "X_val[\"horsepower\"] = X_val[\"horsepower\"].fillna(mean_val)" + ], + "metadata": { + "id": "tlR_SQLSINUN" + }, + "execution_count": 110, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "X_train.isnull().sum() == X_val.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 209 + }, + "id": "UHHaj_TlIzwc", + "outputId": "078cddfb-2d9f-478a-c114-8a7097e2bcd9" + }, + "execution_count": 113, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement True\n", + "horsepower True\n", + "vehicle_weight True\n", + "model_year True\n", + "dtype: bool" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacementTrue
horsepowerTrue
vehicle_weightTrue
model_yearTrue
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 113 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Linear model from scratch" + ], + "metadata": { + "id": "uz9lbiK7-QHg" + } + }, + { + "cell_type": "code", + "source": [ + "x1 = [1, 148, 200, 333]\n", + "x2 = [1, 132, 201, 333]\n", + "x10 = [1, 2, 222, 333]\n", + "\n", + "w = [0.01, 0.04, 0.002, 0.1]\n", + "\n", + "X = [x1, x2, x10]\n", + "X = np.array(X)\n", + "X" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2Zqj2AsU3Ega", + "outputId": "cb1085b6-a08c-4dfd-d185-61fb53b1fdc4" + }, + "execution_count": 48, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[ 1, 148, 200, 333],\n", + " [ 1, 132, 201, 333],\n", + " [ 1, 2, 222, 333]])" + ] + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "code", + "source": [ + "def linear_reg(X):\n", + " return X.dot(w)" + ], + "metadata": { + "id": "iKLe4Dvm-n1z" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "linear_reg(X)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yfAo4hvT-6Cb", + "outputId": "2dc62913-26d5-4d23-d8ad-09667d8877bf" + }, + "execution_count": 51, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([39.63 , 38.992, 33.834])" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Use the sklearn linear reg\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "model = LinearRegression()\n", + "model.fit(X_train, y_train)\n", + "y_pred_val = model.predict(X_val)" + ], + "metadata": { + "id": "KQLg2EU4_IDb" + }, + "execution_count": 114, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# plot the predictions\n", + "sns.histplot(y_pred_val, color='red')\n", + "sns.histplot(y_val, color='blue');" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 450 + }, + "id": "nDKY3cEREUl4", + "outputId": "0365a17b-f410-4d0d-9694-c759a94df4c1" + }, + "execution_count": 115, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGxCAYAAACEFXd4AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAQzNJREFUeJzt3X98U9Xh//F30l9pMG1pK7SdlFY+WuMPQFQ63FR+TUDHRNkPHN1wKrgNcAPdWDcrP3SDzQ2dDmE/FNwDGZt7qHOTsYeAoFNAQRlfNOtH+DRWR4EFLC1N0ybN/f6BjaRN+ou2SW9fz8cjD7jnnnNzck3rm3vPPcdiGIYhAAAAk7LGugMAAAA9ibADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMLTHWHYgHwWBQhw8flsPhkMViiXV3AABABxiGodraWuXl5clqjX79hrAj6fDhwxoyZEisuwEAALrggw8+0HnnnRd1P2FHksPhkHT6ZKWlpcW4NwAAoCNqamo0ZMiQ0P/HoyHsSKFbV2lpaYQdAAD6mPaGoDBAGQAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmBphBwAAmFpMw84rr7yiqVOnKi8vTxaLRc8//3zYfovFEvH10EMPheoUFBS02r9ixYpe/iQAACBexTTs1NXVacSIEVq1alXE/VVVVWGvJ598UhaLRdOnTw+rt2zZsrB68+fP743uAwCAPiCmC4FOmTJFU6ZMibo/JycnbPsvf/mLxo0bp/PPPz+s3OFwtKoLAAAg9aExO0ePHtWLL76oO+64o9W+FStWKCsrS5dffrkeeughBQKBGPQQAADEo5he2emMp556Sg6HQ7fccktY+d13361Ro0YpMzNTr7/+ukpLS1VVVaWVK1dGPVZDQ4MaGhpC2zU1NT3WbwDmU1lZKY/H06W22dnZys/P7+YeAWhLnwk7Tz75pGbOnCmbzRZWvnDhwtDfhw8fruTkZN11111avny5UlJSIh5r+fLlWrp0aY/2F4A5VVZWqqjIKZ/P26X2Nptd5eUuAg/Qi/pE2Hn11VdVXl6uP/7xj+3WLS4uViAQkNvtVlFRUcQ6paWlYSGppqZGQ4YM6bb+AjAvj8cjn88rp3O97HZnp9p6vS65XCXyeDyEHaAX9Ymw88QTT+iKK67QiBEj2q27b98+Wa1WDRo0KGqdlJSUqFd9AKAj7HanHI5Rse4GgA6Iadg5deqUDh48GNquqKjQvn37lJmZGfpXT01NjZ555hn94he/aNV+586d2r17t8aNGyeHw6GdO3dqwYIFKikp0cCBA3vtcwCIT10dW8O4GsBcYhp29uzZo3HjxoW2m28tzZo1S+vWrZMkbdy4UYZh6NZbb23VPiUlRRs3btSSJUvU0NCgwsJCLViwIOwWFYD+qbKyUs6iInl9vk63tdtscpWXE3gAk4hp2Bk7dqwMw2izzpw5czRnzpyI+0aNGqVdu3b1RNcA9HEej0den0/rnU457fYOt3N5vSpxuRhXA5hInxizAwBd5bTbNcrhiHU3AMRQn5lUEAAAoCsIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNQIOwAAwNR49BwAuonP55Pf74+63+utkyS5XK6wcmZsBnoWYQcAuoHP59Mbb+xWMBhso9b/SpJKSkrCSpmxGehZhB0A6AZ+v1/BYFBOu112a0LEOt6mAXLVK2xWZ2ZsBnoeYQcAupHdmiBHYuSw0zxMklmdgd7FAGUAAGBqhB0AAGBqhB0AAGBqhB0AAGBqDFAG0G9V+nzytJgXx+X1nv6zxVw4of1RygHEL8IOgH6p0udT0Rt75AsGIu5vORdOS42NDT3RLQA9gLADoF/y+P3yBQNy2h+U3VoYKvc2BeWqr5PTebHs9gGt2h0/vklud5kCgcghCUD8IewA6Nfs1kI5Ep1nlDRJqpXdPlKOCHPheL3cxgL6GgYoAwAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAU2NSQQCIwOuti1heX18f+rO2trbd+gBij7ADAGdoNIKS2lrws0KS5HZXyO1OarXXMIKSEnqodwC6grADAGcIGIYkqchm0zmJrcPMcX+q3A1Sgc2mrMRPlpM4EfCrwueT8XF7APGDsAMAEditVjkSW1+h8TZZJEmploSw/d5gU6/1DUDnMEAZAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYWkwnFXzllVf00EMPae/evaqqqtJzzz2nadOmhfbfdttteuqpp8LaTJo0SZs3bw5tnzhxQvPnz9df//pXWa1WTZ8+Xb/85S91zjnn9NbHAIBOcXm9rf6+adOmNpaokDIyMpSbm9uqPDs7W/n5+d3fScBEYhp26urqNGLECN1+++265ZZbItaZPHmy1q5dG9pOSUkJ2z9z5kxVVVXppZdekt/v1ze+8Q3NmTNHGzZs6NG+A0BnNQY9kqwqiRBqysrK2mltlRRsVWq32eQqLyfwAG2IadiZMmWKpkyZ0madlJQU5eTkRNzncrm0efNmvfnmm7ryyislSY899phuuOEG/fznP1deXl639xkAuipg1EoKqiBlmbKShkmSjgf8cvt8GpqcIntC5F/J9cEKuRvu13qnU067PVTu8npV4nLJ4/EQdoA2xP3aWNu3b9egQYM0cOBAjR8/Xg8++KCysrIkSTt37lRGRkYo6EjSxIkTZbVatXv3bt18882x6jYARJVqLZQj0SlJ8gYbJXmVmWhXenJyxPq1AavcDZLTbtcohyNiHQDRxXXYmTx5sm655RYVFhbq0KFD+uEPf6gpU6Zo586dSkhI0JEjRzRo0KCwNomJicrMzNSRI0eiHrehoUENDQ2h7Zqamh77DAAAILbiOuzMmDEj9PfLLrtMw4cP17Bhw7R9+3ZNmDChy8ddvny5li5d2h1dBAAAca5PPXp+/vnnKzs7WwcPHpQk5eTk6NixY2F1AoGATpw4EXWcjySVlpbq5MmTodcHH3zQo/0GAACx06fCzocffqjjx4+HHr8cM2aMqqurtXfv3lCdbdu2KRgMqri4OOpxUlJSlJaWFvYCAADmFNPbWKdOnQpdpZGkiooK7du3T5mZmcrMzNTSpUs1ffp05eTk6NChQ/r+97+v//mf/9GkSZMkSU6nU5MnT9bs2bO1Zs0a+f1+zZs3TzNmzOBJLKCfqKyslMfjaVXePGfNmXPahO2PUg7AfGIadvbs2aNx48aFthcuXChJmjVrllavXq39+/frqaeeUnV1tfLy8nT99dfrgQceCJtr5+mnn9a8efM0YcKE0KSCjz76aK9/FgC9r7KyUkVFTvl80YNLpDltztRoGN3dLQBxJqZhZ+zYsTLa+EXzj3/8o91jZGZmMoEg0E95PB75fF45netltzvD9nm9dXK53pUzdYDsCa3v2B8PvCa373EFgoQdwOzi+mksAOgIu90ph2NUi9JaSQHZExxyJCa0auNtquiVvgGIvT41QBkAAKCzCDsAAMDUCDsAAMDUCDsAAMDUGKAMIC5Emy+nLa52HisHAImwAyAOdGS+nLY0Nja0XwlAv0XYARBzbc2X05bjxzfJ7S5TIBDowd7Fj5azPjdvt3eFKzs7W/n5+T3WLyDeEXYAxI3I8+VE5/X2j9tYjUGPJGvU2aBLSkrabG+z2VVe7iLwoN8i7ABAnAsYtZKCKkhZpqykYaFyb1NQrvo6OZ0Xy24fELGt1+uSy1Uij8dD2EG/RdgBgD4i1VooR+KZt/maJNXKbh8ph8MRq24BcY9HzwEAgKkRdgAAgKlxGwtAr2hrHp3mp4m83jqdXsAzXFJSkmw2W092D4CJEXYA9LjKyko5i4rk9fnarOdyvSup9WPkVqtVo0cXE3gAdAlhB0CP83g88vp8Wu90ymm3t9rv8npV4nLJmTpA9oTwgbbeYJNcXq/8fj9hB0CXEHYA9Bqn3a5RbTw1ZE+wypGYEF7YP+YLBNCDGKAMAABMjbADAABMjbADAABMjbADAABMjQHKANDHnZ6fqO19mzZtClsdPSMjQ7m5uW0el9XSYRaEHQDooxqNoCSFhZjWdkmyqqysrNPHZ7V0mAVhBwD6qIBhSJKKbDadk5gUsc5xf5PcDUHlJS9ResLpFdPrjSa5fT5WS0e/QdgBgD7Obo0wP9HHvE0WSVJ6wjANTrlEklQbaJKb1dLRjxB2APQJkcal1NfXh/6sra1ttz6A/omwAyCutT0upUKS5HZXyO2OfBvHMIKSIl/1ANA/EHYAxLW2xqUc96fK3SAV2GzKSgy/HXMi4FeFzyfj4/YA+i/CDoA+IdK4lObxKKmWhNb7gk291jcA8Y1JBQEAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKnFNOy88sormjp1qvLy8mSxWPT888+H9vn9fi1atEiXXXaZBgwYoLy8PH3961/X4cOHw45RUFAgi8US9lqxYkUvfxIAABCvYhp26urqNGLECK1atarVPq/Xq7feektlZWV666239Oyzz6q8vFxf+MIXWtVdtmyZqqqqQq/58+f3RvcBAEAfENNVz6dMmaIpU6ZE3Jeenq6XXnoprOxXv/qVRo8ercrKSuXn54fKHQ6HcnJyerSvAACgb+pTY3ZOnjwpi8WijIyMsPIVK1YoKytLl19+uR566CEFAoE2j9PQ0KCampqwFwAAMKeYXtnpDJ/Pp0WLFunWW29VWlpaqPzuu+/WqFGjlJmZqddff12lpaWqqqrSypUrox5r+fLlWrp0aW90GwAAxFifCDt+v19f/vKXZRiGVq9eHbZv4cKFob8PHz5cycnJuuuuu7R8+XKlpKREPF5paWlYu5qaGg0ZMqRnOg/0I5WVlfJ4PK3KXS7X6T+93ojtopUDQHeI+7DTHHTef/99bdu2LeyqTiTFxcUKBAJyu90qKiqKWCclJSVqEALQNZWVlSoqcsrnix5cSj4OPdE0GkZ3dwsA4jvsNAed9957Ty+//LKysrLabbNv3z5ZrVYNGjSoF3oIoJnH45HP55XTuV52uzNsn9dbJ5frXTlTB8ie0Hqo4PHAa3L7HlcgSNgB0P1iGnZOnTqlgwcPhrYrKiq0b98+ZWZmKjc3V1/84hf11ltv6W9/+5uampp05MgRSVJmZqaSk5O1c+dO7d69W+PGjZPD4dDOnTu1YMEClZSUaODAgbH6WEC/Zrc75XCMalFaKykge4JDjsSEVm28TRW90jcA/VNMw86ePXs0bty40HbzOJpZs2ZpyZIleuGFFyRJI0eODGv38ssva+zYsUpJSdHGjRu1ZMkSNTQ0qLCwUAsWLAgbjwMAAPq3mIadsWPHymjjHn1b+yRp1KhR2rVrV3d3CwAAmEifmmcHAACgswg7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1Ag7AADA1OJ6bSwA8ae9lc293jqdXh7iE6fLACA2CDsAOqyyslLOoiJ5fb6odVyudyUFIu4zjKCk1mtjAUBPIuwA6DCPxyOvz6f1TqecdnvYPpfXqxKX6+OVzR1h+04E/Krw+dpdAgYAegJhB0CnOe12jXI4Iu6zJ1hbrWzuDTb1RrcAICIGKAMAAFMj7AAAAFPjNhbQT0V7qkqSqqqqVF1d3aq8oqJC0unxOS1FKgOAeEDYAfqh9p+qskoKRm1f8vFj5pE0MggZQJwh7AD9UEeeqipIWaZUa2HYvpNNAR1ubFCRzaZzEpPC9h0PvCa373EFgoQdAPGFsAP0Y209VZWVNEyORGd4YWOjDjd6Zbfa5UhMDtvlbaroqW4CwFlhgDIAADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1losAgH7K661rd5+rxaKv2dnZys/P79F+Ad2NsAMA/UyjcXpF+5ZBJtz/SpJKSkrCSu02m1zl5QQe9CmEHQDoZwLG6ZXpI61e38zbNECuemm90ymn3S5Jcnm9KnG55PF4CDvoUwg7ANBP2a1WORITouw9PaTTabdrlMPRe50CegADlAEAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKnFNOy88sormjp1qvLy8mSxWPT888+H7TcMQ/fff79yc3OVmpqqiRMn6r333gurc+LECc2cOVNpaWnKyMjQHXfcoVOnTvXipwAAAPEspmGnrq5OI0aM0KpVqyLu/9nPfqZHH31Ua9as0e7duzVgwABNmjRJPp8vVGfmzJl655139NJLL+lvf/ubXnnlFc2ZM6e3PgIAAIhzXQo7559/vo4fP96qvLq6Wueff36HjzNlyhQ9+OCDuvnmm1vtMwxDjzzyiO677z7ddNNNGj58uH7/+9/r8OHDoStALpdLmzdv1u9+9zsVFxfrs5/9rB577DFt3LhRhw8f7spHAwAAJtOlsON2u9XU1NSqvKGhQf/5z3/OulOSVFFRoSNHjmjixImhsvT0dBUXF2vnzp2SpJ07dyojI0NXXnllqM7EiRNltVq1e/fuqMduaGhQTU1N2AsAAJhTp9bGeuGFF0J//8c//qH09PTQdlNTk7Zu3aqCgoJu6diRI0ckSYMHDw4rHzx4cGjfkSNHNGjQoLD9iYmJyszMDNWJZPny5Vq6dGm39BMAAMS3ToWdadOmSZIsFotmzZoVti8pKUkFBQX6xS9+0W2d6ymlpaVauHBhaLumpkZDhgyJYY8AAEBP6VTYCQaDkqTCwkK9+eabys7O7pFOSVJOTo4k6ejRo8rNzQ2VHz16VCNHjgzVOXbsWFi7QCCgEydOhNpHkpKSopSUlO7vNAAAiDtdGrNTUVHRo0FHOh2ocnJytHXr1lBZTU2Ndu/erTFjxkiSxowZo+rqau3duzdUZ9u2bQoGgyouLu7R/gEAgL6hU1d2zrR161Zt3bpVx44dC13xafbkk0926BinTp3SwYMHQ9sVFRXat2+fMjMzlZ+fr+9+97t68MEHdcEFF6iwsFBlZWXKy8sL3U5zOp2aPHmyZs+erTVr1sjv92vevHmaMWOG8vLyuvrRAACAiXQp7CxdulTLli3TlVdeqdzcXFksli69+Z49ezRu3LjQdvM4mlmzZmndunX6/ve/r7q6Os2ZM0fV1dX67Gc/q82bN8tms4XaPP3005o3b54mTJggq9Wq6dOn69FHH+1SfwAAgPl0KeysWbNG69at09e+9rWzevOxY8fKMIyo+y0Wi5YtW6Zly5ZFrZOZmakNGzacVT8AAIB5dSnsNDY26uqrr+7uvgDoRpWVlfJ4PBH3uVyu0396va33RSgDgL6sS2Hnzjvv1IYNG1RWVtbd/QHQDSorK1VU5JTP13ZwKfk49ETS2MZVVwDoS7oUdnw+n37zm99oy5YtGj58uJKSksL2r1y5sls6B6BrPB6PfD6vnM71studrfZ7vXVyud6VM3WA7AnhD2UeD7wmt+9xBYKEHQDm0KWws3///tBcNwcOHAjb19XBygC6n93ulMMxKsKeWkkB2RMcciQmhO3xNlX0St8AoLd0Key8/PLL3d0PAACAHtHleXYAAP2Tq42xXtFkZ2crPz+/B3oDtK9LYWfcuHFt3q7atm1blzsEAIhPVY2NskoqKSnpdFu7zSZXeTmBBzHRpbDTPF6nmd/v1759+3TgwIFWC4QCAMyhOhBQUNJvCwo0Kiurw+1cXq9KXC55PB7CDmKiS2Hn4Ycfjli+ZMkSnTp16qw6BACIb0WpqRrlcMS6G0CHdeuYnZKSEo0ePVo///nPu/OwAIAYOXOSyYr6eklSeX29BtTWttkuOylJ+Wcs7QPEUreGnZ07d4atWwUA6Jsagx5J1ogTT852uyW3u832NmuiykdfSeBBXOhS2LnlllvCtg3DUFVVlfbs2cOsygBgAgGjVlJQBSnLlJU0TJJ0POCX2+dTkc2mcxKTorb1Bivk8t4nj99P2EFc6FLYSU9PD9u2Wq0qKirSsmXLdP3113dLxwAAsZdqLZQj8fQs3N5goySv7Fa7HInJ0RsFeqdvQEd1KeysXbu2u/sBAADQI85qzM7evXtDk0tdcskluvzyy7ulUwAAAN2lS2Hn2LFjmjFjhrZv366MjAxJUnV1tcaNG6eNGzfq3HPP7c4+AgAAdJm1/SqtzZ8/X7W1tXrnnXd04sQJnThxQgcOHFBNTY3uvvvu7u4jAABAl3Xpys7mzZu1ZcsWOZ3OUNnFF1+sVatWMUAZAADElS5d2QkGg0pKav3YYVJSkoLB4Fl3CgAAoLt0KeyMHz9e3/nOd3T48OFQ2X/+8x8tWLBAEyZM6LbOAQAAnK0uhZ1f/epXqqmpUUFBgYYNG6Zhw4apsLBQNTU1euyxx7q7jwAAAF3WpTE7Q4YM0VtvvaUtW7bo3//+tyTJ6XRq4sSJ3do5AACAs9WpKzvbtm3TxRdfrJqaGlksFn3uc5/T/PnzNX/+fF111VW65JJL9Oqrr/ZUXwEAADqtU2HnkUce0ezZs5WWltZqX3p6uu666y6tXLmy2zoHAABwtjoVdv71r39p8uTJUfdff/312rt371l3CgAAoLt0KuwcPXo04iPnzRITE/Xf//73rDsFAADQXToVdj71qU/pwIEDUffv379fubm5Z90pAACA7tKpsHPDDTeorKxMPp+v1b76+notXrxYn//857utcwAAAGerU4+e33fffXr22Wd14YUXat68eSoqKpIk/fvf/9aqVavU1NSkH/3oRz3SUQAAgK7oVNgZPHiwXn/9dX3rW99SaWmpDMOQJFksFk2aNEmrVq3S4MGDe6SjAAAAXdHpSQWHDh2qTZs26aOPPtLBgwdlGIYuuOACDRw4sCf6BwAAcFa6NIOyJA0cOFBXXXVVd/YFAACg23VpbSwAAIC+grADAABMjbADAABMjbADAABMjbADAABMjbADAABMjbADAABMLe7DTkFBgSwWS6vX3LlzJUljx45tte+b3/xmjHsNAADiRZcnFewtb775ppqamkLbBw4c0Oc+9zl96UtfCpXNnj1by5YtC23b7fZe7SMAAIhfcR92zj333LDtFStWaNiwYbruuutCZXa7XTk5Ob3dNQAA0AfE/W2sMzU2Nmr9+vW6/fbbZbFYQuVPP/20srOzdemll6q0tFRerzeGvQQAAPEk7q/snOn5559XdXW1brvttlDZV7/6VQ0dOlR5eXnav3+/Fi1apPLycj377LNRj9PQ0KCGhobQdk1NTU92GwAAxFCfCjtPPPGEpkyZory8vFDZnDlzQn+/7LLLlJubqwkTJujQoUMaNmxYxOMsX75cS5cu7fH+AgCA2Oszt7Hef/99bdmyRXfeeWeb9YqLiyVJBw8ejFqntLRUJ0+eDL0++OCDbu0rAACIH33mys7atWs1aNAg3XjjjW3W27dvnyQpNzc3ap2UlBSlpKR0Z/cAAECc6hNhJxgMau3atZo1a5YSEz/p8qFDh7RhwwbdcMMNysrK0v79+7VgwQJde+21Gj58eAx7DAAA4kWfCDtbtmxRZWWlbr/99rDy5ORkbdmyRY888ojq6uo0ZMgQTZ8+Xffdd1+MegoAAOJNnwg7119/vQzDaFU+ZMgQ7dixIwY9AgAAfUWfGaAMAADQFYQdAABgaoQdAABgan1izA7Qn1VWVsrj8XSqjcvl6qHeAEDfQ9gB4lhlZaWKipzy+bq23ltjY0P7lQDA5Ag7QBzzeDzy+bxyOtfLbnd2uN3x45vkdpcpEAj0YO8AoG8g7AB9gN3ulMMxqsP1vV5uYwFAMwYoAwAAUyPsAAAAU+M2FtDH+Xw++f3+sLL6+vrQn7W1ta3aeL11vdI3AIgHhB2gD/P5fHrjjd0KBoMt9lRIktzuCrndSVHbG0ZQUkLPdRAA4gBhB+jD/H6/gsGgnHa77NZPQstxf6rcDVKBzaasREerdicCflX4fBHXnAMAsyHsACZgtybIkfhJ2PE2WSRJqZbw8tD+YFOv9Q0AYo0BygAAwNQIOwAAwNQIOwAAwNQIOwAAwNQYoAwA6BUuV/vLmFRVVam6ujq0nZGRodzc3A4dPzs7W/n5+V3tHkyMsAMA6FFVjY2ySiopKelAbauklvNGdYzNZld5uYvAg1YIOwCAHlUdCCgo6bcFBRqVlRW1nsvrVYnLpYKUZUq1FqreaJLb55PTebHs9gFtvofX65LLVSKPx0PYQSuEHQBAryhKTdUoR+tJLlvKShomR6JTtYEmuVUru32kHB1oB0RD2AEA9AiX1ytJqvh4rbby+noNiLBWW8v6QHcj7AAAulVj0CPJqpIWA5Jnu92S291+e5YxQTcj7AAAulXAqJUUVEHKMmUlDdPxgF9un09FNpvOSYy+MO3xwGty+x5XIEjYQfci7AAAekSqtVCORKe8wUZJXtmtdjkSk6PW9zZV9F7n0K8wqSAAADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1wg4AADA1losA4kBlZaU8Hk+rctfHCyl6vXWSWq8WfbocANAWwg4QY5WVlXIWFcnr80Wt43K9KykQdb9hBCUldH/nAMAECDtAjHk8Hnl9Pq13OuW028P2ubxelbhccqYOkD3B0artiYBfFT6fDINVomFeHbmC2Vyn+WqoJGVnZys/P7/H+oW+g7ADxAmn3a5RjtaBRpLsCVY5EltfufEGm3q6W0DMNBpBSeEBJrr/lSSVlJSESuw2m1zl5QQeEHYAAPEp8PEVyyKbTeckJrVZ19s0QK56ha6QNl8V9Xg8hB3E99NYS5YskcViCXtddNFFof0+n09z585VVlaWzjnnHE2fPl1Hjx6NYY8BAN3Nbj19ZbOtlz3h9P/Omq+QtrwljP4trsOOJF1yySWqqqoKvf75z3+G9i1YsEB//etf9cwzz2jHjh06fPiwbrnllhj2FgAAxJu4v42VmJionJycVuUnT57UE088oQ0bNmj8+PGSpLVr18rpdGrXrl369Kc/3dtdBQAAcSjur+y89957ysvL0/nnn6+ZM2eqsrJSkrR37175/X5NnDgxVPeiiy5Sfn6+du7c2eYxGxoaVFNTE/YCAADmFNdhp7i4WOvWrdPmzZu1evVqVVRU6JprrlFtba2OHDmi5ORkZWRkhLUZPHiwjhw50uZxly9frvT09NBryJAhPfgpAABALMX1bawpU6aE/j58+HAVFxdr6NCh+tOf/qTU1NQuH7e0tFQLFy4MbdfU1BB4AAAwqbi+stNSRkaGLrzwQh08eFA5OTlqbGxUdXV1WJ2jR49GHONzppSUFKWlpYW9AACAOcX1lZ2WTp06pUOHDulrX/uarrjiCiUlJWnr1q2aPn26JKm8vFyVlZUaM2ZMjHuK/iraGldt6diEaQCArorrsHPvvfdq6tSpGjp0qA4fPqzFixcrISFBt956q9LT03XHHXdo4cKFyszMVFpamubPn68xY8bwJBZioiNrXLWlobGxm3sEAJDiPOx8+OGHuvXWW3X8+HGde+65+uxnP6tdu3bp3HPPlSQ9/PDDslqtmj59uhoaGjRp0iQ9/vjjMe41+qu21rhqy6bjx1XmdisQiL7QJwCg6+I67GzcuLHN/TabTatWrdKqVat6qUdA+9pa4yoSl9fbg70BAPSpAcoAAACdRdgBAACmFte3sQCzqPT55PH7I+6rqK+XJJXX12tAbW3YPm5xAcDZI+wAPazS51PRG3vkC7Y9AHm22y253RH3NRpG93cMAPoJwg7Qwzx+v3zBgJz2B2W3Frbafzzgl9vnU5HNpnMSk1rse01u3+MKBAk7ANBVhB2gl9ithXIkOluVe4ONkryyW+1yJCaH72uq6KXeAYB5MUAZAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGmEHAACYGo+eAwBMo3nW8dCfLle7bbKzs5Wfn9+j/UJsEXYAAH1eY9AjyaqSFuGmpKSk3bY2m13l5S4Cj4kRdgAAfV7AqJUUVEHKMmUlDZO3KShXfZ2czotltw+I2s7rdcnlKpHH4yHsmBhhBwBgGqmhmcqbJNXKbh8ph8MR624hxgg76JcqKyvl8Xg63Y57+wDQ9xB20O9UVlaqqMgpn8/b6bbc2weAvoewg37H4/HI5/PK6Vwvu731wpzRcG8fAPomwg76LbvdKYdjVKy7AQDoYUwqCAAATI2wAwAATI2wAwAATI2wAwAATI0BykAEPp9Pfr8/rMzrrZMkbdq0KeJ6OxUVFZI+WZOnWcttAEDvIuwALfh8Pr3xxm4Fg8EWe3ZJsqqsrKzN9i3X5mnWaBjd00EAQKcQdoAW/H6/gsGgnHa77NaEUPlxf5PcDUHlJS9ResKwVu1ONgV0uLFBRTabzklM+qRd4DW5fY8rECTsAEAsEHaAKOzWBDkSPwk73iaLJCk9YZgGp1zSukFjow43emW32uVITD6jXUWP9xUAEB0DlAEAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKkRdgAAgKnFddhZvny5rrrqKjkcDg0aNEjTpk1TeXl5WJ2xY8fKYrGEvb75zW/GqMcAACDexPVCoDt27NDcuXN11VVXKRAI6Ic//KGuv/56vfvuuxowYECo3uzZs7Vs2bLQtt1uj0V3AQBxxuut69B+l8sVKsvOzlZ+fn6P9gu9K67DzubNm8O2161bp0GDBmnv3r269tprQ+V2u105OTm93T0AQJxqNIKSwkNMZP8rSSopKQmV2G02ucrLCTwmEtdhp6WTJ09KkjIzM8PKn376aa1fv145OTmaOnWqysrK2ry609DQoIaGhtB2TU1Nz3QYABATAcOQJBXZbDonMSlqPW/TALnqpfVOp5x2u1xer0pcLnk8HsKOifSZsBMMBvXd735Xn/nMZ3TppZeGyr/61a9q6NChysvL0/79+7Vo0SKVl5fr2WefjXqs5cuXa+nSpb3RbQBADNmtVjkSE9qocXroqtNu1yiHo3c6hV7XZ8LO3LlzdeDAAf3zn/8MK58zZ07o75dddplyc3M1YcIEHTp0SMOGDYt4rNLSUi1cuDC0XVNToyFDhvRMxwEAQEz1ibAzb948/e1vf9Mrr7yi8847r826xcXFkqSDBw9GDTspKSlKSUnp9n4CAID4E9dhxzAMzZ8/X88995y2b9+uwsLCdtvs27dPkpSbm9vDvQMAAH1BXIeduXPnasOGDfrLX/4ih8OhI0eOSJLS09OVmpqqQ4cOacOGDbrhhhuUlZWl/fv3a8GCBbr22ms1fPjwGPceAADEg7gOO6tXr5Z0euLAM61du1a33XabkpOTtWXLFj3yyCOqq6vTkCFDNH36dN13330x6C0AAIhHcR12jI8fHYxmyJAh2rFjRy/1BgAA9EVxvVwEAADA2SLsAAAAUyPsAAAAUyPsAAAAUyPsAAAAUyPsAAAAU4vrR8+Bs1VZWSmPxxNW5nK5JEleb52k2lZtTpcDAMyCsAPTqqyslLOoSF6fL+J+l+tdSYGo7Q0jKKmt1ZIBAH0BYQd9QqQrNO1xuVzy+nxa73TKabd/Uu71qsTlkjN1gOwJjlbtTgT8qvD52p3UEoB5uLze8D8/vgLcluzsbOXn5/dov9A9CDuIe+1doWnP+YmJGuVoHWrsCVY5EltfufEGm7r0PgD6nsagR5JVJS3CTUlJSbttbTa7ystdBJ4+gLCDuOfxeCJeoWnPpuPHVeZ2KxCIfqsKQP8WMGolBVWQskxZScPkbQrKVV8np/Ni2e0Dorbzel1yuUrk8XgIO30AYQd9htNuj3iFJprmy9EA0J5Ua6EciU5JTZJqZbePlKMTv28Q3wg76LMqfT55/P6o+yvq6yVJ5fX1GlD7yVNXhCAA6F8IO+iTKn0+Fb2xR75g+7eoZrvdktvdqryRAcgA0C8QdtAnefx++YIBOe0Pym4tjFjneMAvt8+nIptN5yQmnVH+mty+xxUIEnYAnJ2OPLUVCU9y9S7CDvo0e+g+e2veYKMkr+xWuxyJyZ+UN1X0Uu8AmFVjY5Uka4ee2oqEJ7l6F2EHAIBOCgSqJQVVUPBbZWWN6lTb5ie5Xn31VTmdkf+xFg1XhLqGsAMAQBelphbJ4ehc2Dmbq0JcEeoawg4AAC20t0Ze/cdPe9bX16v246c9k5KSZLPZ2j12V68KMbdP1xF2AAD4WKMRlNSRgcenx/653RVyu08/AGG1WjV6dHGHAo/UtatC6BrCDmKuvXWvmn/pnDk/DnPlAOgJgY+npGj5FGdLx/2pcjdIBTabshId8gab5PJ65ff7Oxx20HsIO4ipyspKFRU55fO1H15arl0jMVcOgJ5ht0ZeO6+Zt8kiSUq1JJyux6o0cY2wg06LdCWmqqpK1dXVbbbLyMhQbm5uWJnL5ZLP55XTuV52e5RHyL11crne/XiVcqsk5soBEJ/aG+sjtR7v09GxPug6wg46JfoK5FZJwS4fNzHx/DbuXddKCsie4Aj9S4u5cgDEk46P9ZFajvfp7FgfdB5hB50SaQVyl9erEpdLBSnLlBplNuN6o0lun6/VSsLHj2+S213GyuQA+rSOjvWRwsf7pFrtjPXpBYQddEmkFcizkoZFnc24NtAkd4SVhL3erk21DgDxqL2xPlL4eB+7te266B6EHfSqlvezI81V0V4bADCbjvyea67TfKuM2ZQ7jrCDXhH9fnbruSqiMYygJP4VBMA8OjfW538lKTTzst1mk6u8nMDTAYQd9Ipo97NbzlURyYmAXxU+nwweMwdgMp0Z6+NtGiBXvbT+4/W0SlwuZlPuIMIOwvT0BH8t72e3mqsiAm+wqcPHB4C+qCNjfU4/9arQwyFSR68ItdbfboERdhDCBH8A0DdUNTbKKnVpMVGp/90CI+wgxOPxMMEfAPQB1YGAgpJ+W1CgUVlZnWrbPF1If7oFRthBK3a7kwn+AKAPKEpNbTUNCFqzxroDAAAAPYmwAwAATI3bWP1Yyyevmkf1n564ign+AADtP6Xblnh56ouw009FX9BTcrneldT2WlVM8AcA5teZp3QjsdnsKi93xTzwEHZMqCMp3OVyyevz6YGCAhWmpkqSKurrVeZ2f/ykFRP8AUA8cnm9qvh4qZ3y+noNiLLUTksNwaBSrNbQ3GibNm1qd56eiooK+XxeFRT8VqmpRTKMoCyWjo2Aqa8vl9s9W6+++qquueaamAYewo7JdDaFl7ndrcoSrRYm+AOAONMY9Eiyhs1zNtvtliL8Ho/MKikY2iorK+vwe7vdTfrkin+wrapnOP3/i5KSkpjP62OasLNq1So99NBDOnLkiEaMGKHHHntMo0ePjnW3esWZV3JcLldYCo+mvr5ebncF8+UAQB8RMGolBVWQskyy5Mvt83VomQnpk9/vZ7Ydmpwie0LbMaA68Jqq/GtUYLPJIpsqOvGezctbPFBQoDK3O6bz+pgi7Pzxj3/UwoULtWbNGhUXF+uRRx7RpEmTVF5erkGDBsW6ez0q2tib8BQeSZKkC5VqHSDHx19a5ssBgPiXai2ULBdI8sputcuRmNxum+bf72e2zUy0Kz25vbbvq8p/ekkffXz7qmNLW0jND3w3D5WIJVOEnZUrV2r27Nn6xje+IUlas2aNXnzxRT355JP6wQ9+ENO+dWYUe1VVlaqrq0PbjY2NSm7ni1hRURE29qYj424kxt4AAPqPPh92GhsbtXfvXpWWlobKrFarJk6cqJ07d8awZ2c/ir3l/dW2tBx709a4G4mxNwCA/qPPhx2Px6OmpiYNHjw4rHzw4MH697//HbFNQ0ODGhoaQtsnT56UJNXU1HRr39xut3w+r4YM+Z6Sk4e0Wbex0acPPqjUwIQkJVosagi+q5rgZqVZv6oUa07Udg3BoGqCAZ2bkCRbQoJONb2jj5r+rv82HlBDU/SQVdMUkNQgjz9FdcHTX4OTgf+TJH0UeEdNRn2Pt4vX94yXdvH4nj3RLt7es6faRWvb1XaxeM+OtIvUtqfbxeI9u6uvp+c069l2sXpPX9AtSXLVnZ6f7dSpU93+/9nm47V7l8Lo4/7zn/8YkozXX389rPx73/ueMXr06IhtFi9ebEjixYsXL168eJng9cEHH7SZFfr8lZ3s7GwlJCTo6NGjYeVHjx5VTk7kKyKlpaVauHBhaDsYDOrEiRPKysqSxWLp0f7GUk1NjYYMGaIPPvhAaWlpse5OzHE+wnE+wnE+WuOchON8hIvF+TAMQ7W1tcrLy2uzXp8PO8nJybriiiu0detWTZs2TdLp8LJ161bNmzcvYpuUlBSlpKSElWVkZPRwT+NHWloaP5hn4HyE43yE43y0xjkJx/kI19vnIz09vd06fT7sSNLChQs1a9YsXXnllRo9erQeeeQR1dXVhZ7OAgAA/Zcpws5XvvIV/fe//9X999+vI0eOaOTIkdq8eXOrQcsAAKD/MUXYkaR58+ZFvW2F01JSUrR48eJWt/D6K85HOM5HOM5Ha5yTcJyPcPF8PiyGwaxyAADAvDq2dCkAAEAfRdgBAACmRtgBAACmRtgxiYKCAlksllavuXPnRqy/bt26VnVtNlsv97r7vPLKK5o6dary8vJksVj0/PPPh+03DEP333+/cnNzlZqaqokTJ+q9995r97irVq1SQUGBbDabiouL9cYbb/TQJ+hebZ0Pv9+vRYsW6bLLLtOAAQOUl5enr3/96zp8+HCbx1yyZEmr78xFF13Uw5+k+7T3Hbnttttafb7Jkye3e1wzfkckRfx9YrFY9NBDD0U9Zl/+jixfvlxXXXWVHA6HBg0apGnTpqm8vDysjs/n09y5c5WVlaVzzjlH06dPbzWhbUtd/d0Ta+2djxMnTmj+/PkqKipSamqq8vPzdffdd4eWX4qmqz9nZ4uwYxJvvvmmqqqqQq+XXnpJkvSlL30papu0tLSwNu+//35vdbfb1dXVacSIEVq1alXE/T/72c/06KOPas2aNdq9e7cGDBigSZMmyefzRT3mH//4Ry1cuFCLFy/WW2+9pREjRmjSpEk6duxYT32MbtPW+fB6vXrrrbdUVlamt956S88++6zKy8v1hS98od3jXnLJJWHfmX/+85890f0e0d53RJImT54c9vn+8Ic/tHlMs35HJIWdh6qqKj355JOyWCyaPn16m8ftq9+RHTt2aO7cudq1a5deeukl+f1+XX/99ar7eF0nSVqwYIH++te/6plnntGOHTt0+PBh3XLLLW0etyu/e+JBe+fj8OHDOnz4sH7+85/rwIEDWrdunTZv3qw77rij3WN39uesW3TH+lSIP9/5zneMYcOGGcFgMOL+tWvXGunp6b3bqV4iyXjuuedC28Fg0MjJyTEeeuihUFl1dbWRkpJi/OEPf4h6nNGjRxtz584NbTc1NRl5eXnG8uXLe6TfPaXl+YjkjTfeMCQZ77//ftQ6ixcvNkaMGNG9nYuRSOdk1qxZxk033dSp4/Sn78hNN91kjB8/vs06ZvqOHDt2zJBk7NixwzCM078zkpKSjGeeeSZUx+VyGZKMnTt3RjxGV3/3xKOW5yOSP/3pT0ZycrLh9/uj1unKz1l34MqOCTU2Nmr9+vW6/fbb21zr69SpUxo6dKiGDBmim266Se+8804v9rL3VFRU6MiRI5o4cWKoLD09XcXFxdq5c2fENo2Njdq7d29YG6vVqokTJ0Zt05edPHlSFoul3WVT3nvvPeXl5en888/XzJkzVVlZ2Tsd7CXbt2/XoEGDVFRUpG9961s6fvx41Lr96Tty9OhRvfjiix36V7tZviPNt2MyMzMlSXv37pXf7w/7733RRRcpPz8/6n/vrvzuiVctz0e0OmlpaUpMbHsKv878nHUXwo4JPf/886qurtZtt90WtU5RUZGefPJJ/eUvf9H69esVDAZ19dVX68MPP+y9jvaSI0eOSFKrGbUHDx4c2teSx+NRU1NTp9r0VT6fT4sWLdKtt97a5no2xcXFoUvVq1evVkVFha655hrV1tb2Ym97zuTJk/X73/9eW7du1U9/+lPt2LFDU6ZMUVNTU8T6/ek78tRTT8nhcLR7y8Ys35FgMKjvfve7+sxnPqNLL71U0unfI8nJya3+QdDWf++u/O6JR5HOR0sej0cPPPCA5syZ0+axOvtz1l1MM4MyPvHEE09oypQpba4CO2bMGI0ZMya0ffXVV8vpdOrXv/61Hnjggd7oJuKA3+/Xl7/8ZRmGodWrV7dZd8qUKaG/Dx8+XMXFxRo6dKj+9Kc/dehf/PFuxowZob9fdtllGj58uIYNG6bt27drwoQJMexZ7D355JOaOXNmuw8xmOU7MnfuXB04cKDPjDfqae2dj5qaGt144426+OKLtWTJkjaPFaufM67smMz777+vLVu26M477+xUu6SkJF1++eU6ePBgD/UsdnJyciSp1VMTR48eDe1rKTs7WwkJCZ1q09c0B533339fL730UqdXKc7IyNCFF15oyu+MJJ1//vnKzs6O+vn6w3dEkl599VWVl5d3+neK1De/I/PmzdPf/vY3vfzyyzrvvPNC5Tk5OWpsbFR1dXVY/bb+e3fld0+8iXY+mtXW1mry5MlyOBx67rnnlJSU1Knjt/dz1l0IOyazdu1aDRo0SDfeeGOn2jU1Nen//b//p9zc3B7qWewUFhYqJydHW7duDZXV1NRo9+7dYVe3zpScnKwrrrgirE0wGNTWrVujtulLmoPOe++9py1btigrK6vTxzh16pQOHTpkyu+MJH344Yc6fvx41M9n9u9IsyeeeEJXXHGFRowY0em2fek7YhiG5s2bp+eee07btm1TYWFh2P4rrrhCSUlJYf+9y8vLVVlZGfW/d1d+98SL9s6HdPqzXH/99UpOTtYLL7zQpelL2vs56za9PiQaPaapqcnIz883Fi1a1Grf1772NeMHP/hBaHvp0qXGP/7xD+PQoUPG3r17jRkzZhg2m8145513erPL3aa2ttZ4++23jbffftuQZKxcudJ4++23Q08XrVixwsjIyDD+8pe/GPv37zduuukmo7Cw0Kivrw8dY/z48cZjjz0W2t64caORkpJirFu3znj33XeNOXPmGBkZGcaRI0d6/fN1Vlvno7Gx0fjCF75gnHfeeca+ffuMqqqq0KuhoSF0jJbn45577jG2b99uVFRUGK+99poxceJEIzs72zh27FgsPmKntXVOamtrjXvvvdfYuXOnUVFRYWzZssUYNWqUccEFFxg+ny90jP7yHWl28uRJw263G6tXr454DDN9R771rW8Z6enpxvbt28N+Jrxeb6jON7/5TSM/P9/Ytm2bsWfPHmPMmDHGmDFjwo5TVFRkPPvss6HtjvzuiUftnY+TJ08axcXFxmWXXWYcPHgwrE4gEAgd58zz0dGfs55A2DGRf/zjH4Yko7y8vNW+6667zpg1a1Zo+7vf/a6Rn59vJCcnG4MHDzZuuOEG46233urF3navl19+2ZDU6tX8mYPBoFFWVmYMHjzYSElJMSZMmNDqPA0dOtRYvHhxWNljjz0WOk+jR482du3a1Uuf6Oy0dT4qKioi7pNkvPzyy6FjtDwfX/nKV4zc3FwjOTnZ+NSnPmV85StfMQ4ePNj7H66L2jonXq/XuP76641zzz3XSEpKMoYOHWrMnj27VWjpL9+RZr/+9a+N1NRUo7q6OuIxzPQdifYzsXbt2lCd+vp649vf/rYxcOBAw263GzfffLNRVVXV6jhntunI75541N75iPb9kWRUVFSEHae5TUd/znoCq54DAABTY8wOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOAAAwNcIOYEKGYWjOnDnKzMyUxWLRvn37zvqYS5Ys0ciRI8/6OM3+/e9/69Of/rRsNlvouC3L3G53p/p/2223adq0ad3WRwDmkBjrDgDofps3b9a6deu0ffv20KrC8Wbx4sUaMGCAysvLdc4550Qsy8jIUFVVVYf7/8tf/lJMCg+gJcIOYELNK01fffXVse5KVIcOHdKNN96ooUOHtlmWk5PT4WOmp6d3ax8BmAO3sQCTue222zR//nxVVlbKYrGooKBABQUFeuSRR8LqjRw5UkuWLAltV1dX684779S5556rtLQ0jR8/Xv/617+63I/f/e53cjqdstlsuuiii/T444+H9lksFu3du1fLli2TxWLRkiVLIpZFuo31zjvv6POf/7zS0tLkcDh0zTXX6NChQ6HPfuZtrGAwqOXLl6uwsFCpqakaMWKE/vznP4f2b9++XRaLRVu3btWVV14pu92uq6++WuXl5WGf5a9//auuuuoq2Ww2ZWdn6+abb5YkLVu2TJdeemmrzz5y5EiVlZW1e46a+/uTn/xEgwcPVkZGhpYtW6ZAIKDvfe97yszM1Hnnnae1a9eG2jSfk40bN+rqq6+WzWbTpZdeqh07doQd+4UXXtAFF1wgm82mcePG6amnnpLFYlF1dXW7/QLMhrADmMwvf/lLLVu2TOedd56qqqr05ptvdqjdl770JR07dkx///vftXfvXo0aNUoTJkzQiRMnOt2Hp59+Wvfff79+/OMfy+Vy6Sc/+YnKysr01FNPSZKqqqp0ySWX6J577lFVVZXuvffeiGUt/ec//9G1116rlJQUbdu2TXv37tXtt9+uQCAQsR/Lly/X73//e61Zs0bvvPOOFixYoJKSklbB4Ec/+pF+8YtfaM+ePUpMTNTtt98e2vfiiy/q5ptv1g033KC3335bW7du1ejRoyVJt99+u1wuV9g5fvvtt7V//3594xvf6NC52rZtmw4fPqxXXnlFK1eu1OLFi/X5z39eAwcO1O7du/XNb35Td911lz788MOwdt/73vd0zz336O2339aYMWM0depUHT9+XJJUUVGhL37xi5o2bZr+9a9/6a677tKPfvSjDvUHMKUeX1cdQK97+OGHjaFDh4a2hw4dajz88MNhdUaMGGEsXrzYMAzDePXVV420tDTD5/OF1Rk2bJjx61//2jAMw1i8eLExYsSIDr3/sGHDjA0bNoSVPfDAA8aYMWMivn+0soqKCkOS8fbbbxuGYRilpaVGYWGh0djYGPF9Z82aZdx0002GYRiGz+cz7Ha78frrr4fVueOOO4xbb73VMAzDePnllw1JxpYtW0L7X3zxRUOSUV9fbxiGYYwZM8aYOXNm1M86ZcoU41vf+lZoe/78+cbYsWOj1m/Z36FDhxpNTU2hsqKiIuOaa64JbQcCAWPAgAHGH/7wB8MwPjknK1asCNXx+/3GeeedZ/z0pz81DMMwFi1aZFx66aVh7/WjH/3IkGR89NFHHeobYCaM2QGgf/3rXzp16pSysrLCyuvr60O3iDqqrq5Ohw4d0h133KHZs2eHygOBwFmPqdm3b5+uueYaJSUltVv34MGD8nq9+tznPhdW3tjYqMsvvzysbPjw4aG/5+bmSpKOHTum/Px87du3L+xztDR79mzdfvvtWrlypaxWqzZs2KCHH364w5/pkksukdX6yUX2wYMHh90aS0hIUFZWlo4dOxbWbsyYMaG/JyYm6sorr5TL5ZIklZeX66qrrgqr33w1CuiPCDtAP2C1Wls9peT3+0N/P3XqlHJzc7V9+/ZWbTMyMjr1XqdOnZIk/fa3v1VxcXHYvoSEhE4dq6XU1NRO9+PFF1/Upz71qbB9KSkpYdtnhieLxSLp9Hifjrzn1KlTlZKSoueee07Jycny+/364he/2OF+tgxuFoslYllzfwB0HmEH6AfOPfdcVVVVhbZrampUUVER2h41apSOHDmixMREFRQUnNV7DR48WHl5efq///s/zZw586yO1dLw4cP11FNPye/3t3t15+KLL1ZKSooqKyt13XXXndV7bt26NeoYnMTERM2aNUtr165VcnKyZsyY0alQ1lW7du3StddeK+n0VbO9e/dq3rx5kqSioiJt2rQprH5Hx24BZkTYAfqB8ePHa926dZo6daoyMjJ0//33h11lmThxosaMGaNp06bpZz/7mS688EIdPnw4NDj3yiuv7NT7LV26VHfffbfS09M1efJkNTQ0aM+ePfroo4+0cOHCLn+OefPm6bHHHtOMGTNUWlqq9PR07dq1S6NHj1ZRUVFYXYfDoXvvvVcLFixQMBjUZz/7WZ08eVKvvfaa0tLSNGvWrA695+LFizVhwgQNGzZMM2bMUCAQ0KZNm7Ro0aJQnTvvvFNOp1OS9Nprr3X583XGqlWrdMEFF8jpdOrhhx/WRx99FBpYfdddd2nlypVatGiR7rjjDu3bt0/r1q2T9MmVK6A/4WksoB8oLS3Vddddp89//vO68cYbNW3aNA0bNiy032KxaNOmTbr22mv1jW98QxdeeKFmzJih999/X4MHD+70+91555363e9+p7Vr1+qyyy7Tddddp3Xr1qmwsPCsPkdWVpa2bdumU6dO6brrrtMVV1yh3/72t1Gv8jzwwAMqKyvT8uXL5XQ6NXnyZL344oud6sfYsWP1zDPP6IUXXtDIkSM1fvx4vfHGG2F1LrjgAl199dW66KKLWt266ykrVqzQihUrNGLECP3zn//UCy+8EJp8sbCwUH/+85/17LPPavjw4Vq9enXoaayWt/CA/sBitLyRDwDoFMMwdMEFF+jb3/72WV256gi3263CwkK9/fbbnVq+48c//rHWrFmjDz74oOc6B8QpbmMBwFn473//q40bN+rIkSMdnlunNzz++OO66qqrlJWVpddee00PPfRQaEwP0N8QdgB0WvNaVpH8/e9/1zXXXNOLvYmtQYMGKTs7W7/5zW80cODAsH2xPE/vvfeeHnzwQZ04cUL5+fm65557VFpa2mPvB8QzbmMB6LSDBw9G3fepT32qV55G6gs4T0B8IOwAAABT42ksAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgaoQdAABgav8fDVhfpzPCgHcAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "y_val.shape == y_pred_val.shape" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ke9_p6WKGB-G", + "outputId": "72a80a64-77ff-4d78-8422-51c9ea9745e8" + }, + "execution_count": 116, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 116 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Calculates the root mean of squared error\n", + "def rmse(y_true, y_pred):\n", + " squared_error = (y_true - y_pred) ** 2\n", + " mean_se = squared_error.mean()\n", + " r_mse = np.sqrt(mean_se)\n", + " return round(float(r_mse), 2)" + ], + "metadata": { + "id": "QsA_C38kFQue" + }, + "execution_count": 155, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# from imputing NaN values with 0 we get RMSE\n", + "rmse(y_val, y_pred_val)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i6sQdjS-GAYZ", + "outputId": "7ebfdaca-1871-4fc9-df07-0e119b47563a" + }, + "execution_count": 153, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "np.float64(0.4585064814908114)" + ] + }, + "metadata": {}, + "execution_count": 153 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# from imputing NaN values with the mean\n", + "rmse(y_val, y_pred_val)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dDXWaMNcGieb", + "outputId": "139f83d4-add0-4817-b887-51410aa87e51" + }, + "execution_count": 118, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.46" + ] + }, + "metadata": {}, + "execution_count": 118 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Q4. Regularisation" + ], + "metadata": { + "id": "JwhfRLZ_LE5E" + } + }, + { + "cell_type": "code", + "source": [ + "shuffled_df.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "JQC5_7DUJL8z", + "outputId": "814aabda-ae7a-4873-f720-885da0b0ee65" + }, + "execution_count": 119, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement horsepower vehicle_weight model_year \\\n", + "0 220 144.0 2535.887591 2009 \n", + "1 160 141.0 2741.170484 2019 \n", + "2 230 155.0 2471.880237 2017 \n", + "3 150 206.0 3748.164469 2015 \n", + "4 300 111.0 2135.716359 2006 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 16.642943 \n", + "1 16.298377 \n", + "2 18.591822 \n", + "3 11.818843 \n", + "4 19.402209 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementhorsepowervehicle_weightmodel_yearfuel_efficiency_mpg
0220144.02535.887591200916.642943
1160141.02741.170484201916.298377
2230155.02471.880237201718.591822
3150206.03748.164469201511.818843
4300111.02135.716359200619.402209
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "shuffled_df", + "summary": "{\n \"name\": \"shuffled_df\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 10,\n 200,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.879555200339414,\n \"min\": 37.0,\n \"max\": 271.0,\n \"num_unique_values\": 192,\n \"samples\": [\n 101.0,\n 232.0,\n 142.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.89486003118515,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2952.280077222956,\n 3527.0988925098245,\n 2697.391829652872\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2020,\n 2022,\n 2009\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.556467702897423,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 14.748942858764156,\n 11.437054433578467,\n 15.54922028907195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 119 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X2 = shuffled_df.drop(columns='fuel_efficiency_mpg')\n", + "y2 = shuffled_df[\"fuel_efficiency_mpg\"]\n", + "print(f\"Shape of X: {X.shape}\\nShape of y: {y.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "usu1f9e4LYyD", + "outputId": "d8ecceb2-05ac-4b0b-aa64-3613baabfc64" + }, + "execution_count": 135, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Shape of X: (9704, 4)\n", + "Shape of y: (9704,)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# fill with 0 as asked in the question\n", + "X2[\"horsepower\"].fillna(0, inplace=True);" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9zWvGRMrLe8q", + "outputId": "4f19e7a1-bcec-4565-bf65-40e59fc1bd66" + }, + "execution_count": 136, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/tmp/ipython-input-3329028345.py:2: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n", + "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n", + "\n", + "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n", + "\n", + "\n", + " X2[\"horsepower\"].fillna(0, inplace=True);\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "X2.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 209 + }, + "id": "SNyDGux8OkR6", + "outputId": "c9fb25ae-0386-4287-b3c0-acb46785c2a3" + }, + "execution_count": 137, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "horsepower 0\n", + "vehicle_weight 0\n", + "model_year 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
horsepower0
vehicle_weight0
model_year0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 137 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# train, val, test split\n", + "np.random.seed(42)\n", + "X_train, X_temp, y_train, y_temp = train_test_split(X2, y2, test_size=0.4)\n", + "X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5)" + ], + "metadata": { + "id": "sMrvPT24OVIf" + }, + "execution_count": 139, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# for regularisation importing Ridge\n", + "from sklearn.linear_model import Ridge\n", + "r_values = [0, 0.01, 0.1, 1, 5, 10, 100]\n", + "hist = []\n", + "\n", + "for r in r_values:\n", + " model = Ridge(alpha=r)\n", + " model.fit(X_train, y_train)\n", + " y_preds = model.predict(X_val)\n", + " error = rmse(y_val, y_preds)\n", + " print(f\"With r = {r}: RMSE: {error}\")\n", + " hist.append(error)\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p_IlLV_GLw_A", + "outputId": "a3e760c4-f727-4cd5-bd2e-9af025cc4e96" + }, + "execution_count": 149, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "With r = 0: RMSE: 0.51\n", + "With r = 0.01: RMSE: 0.51\n", + "With r = 0.1: RMSE: 0.51\n", + "With r = 1: RMSE: 0.51\n", + "With r = 5: RMSE: 0.51\n", + "With r = 10: RMSE: 0.51\n", + "With r = 100: RMSE: 0.51\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Clearly, no effect of regularisation" + ], + "metadata": { + "id": "vcferr4UR1Si" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Q5 Different seed values" + ], + "metadata": { + "id": "anq35JmwSLJr" + } + }, + { + "cell_type": "code", + "source": [ + "# --- Load dataset\n", + "df2 = pd.read_csv(\"car_fuel_efficiency.csv\").fillna(0)\n", + "\n", + "# --- Features and target\n", + "X = df2[['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight']]\n", + "y = df2['fuel_efficiency_mpg']\n", + "\n", + "\n", + "# Calculates the root mean of squared error\n", + "def rmse(y_true, y_pred):\n", + " squared_error = (y_true - y_pred) ** 2\n", + " mean_se = squared_error.mean()\n", + " r_mse = np.sqrt(mean_se)\n", + " return float(r_mse)\n", + "\n", + "# iterate with different random seeds\n", + "hist = []\n", + "\n", + "for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:\n", + " X_tr, X_temp, y_tr, y_temp = train_test_split(X, y, test_size=0.4, random_state=i)\n", + " X_val, X_t, y_val, y_t = train_test_split(X_temp, y_temp, test_size=0.5, random_state=i)\n", + "\n", + " model = LinearRegression()\n", + " model.fit(X_tr, y_tr)\n", + " y_p = model.predict(X_val)\n", + " r_mse = rmse(y_val, y_p)\n", + "\n", + " print(f\"With seed {i}: RMSE: {r_mse}\")\n", + "\n", + " hist.append(r_mse)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NFRzMsuZN6mc", + "outputId": "52591494-6b76-4171-9496-189c4fbc2b9a" + }, + "execution_count": 170, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "With seed 0: RMSE: 0.5179381763288806\n", + "With seed 1: RMSE: 0.5171639086555457\n", + "With seed 2: RMSE: 0.5198562964522706\n", + "With seed 3: RMSE: 0.5215830764201178\n", + "With seed 4: RMSE: 0.5113660125711117\n", + "With seed 5: RMSE: 0.5150801979905301\n", + "With seed 6: RMSE: 0.5145383165566438\n", + "With seed 7: RMSE: 0.5357713657145978\n", + "With seed 8: RMSE: 0.5183959791528321\n", + "With seed 9: RMSE: 0.5197067240692991\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "hist" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cfhC_SmmUwm7", + "outputId": "087d1839-eb49-4ac1-c8c2-d4a6fdc38f54" + }, + "execution_count": 171, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[0.5179381763288806,\n", + " 0.5171639086555457,\n", + " 0.5198562964522706,\n", + " 0.5215830764201178,\n", + " 0.5113660125711117,\n", + " 0.5150801979905301,\n", + " 0.5145383165566438,\n", + " 0.5357713657145978,\n", + " 0.5183959791528321,\n", + " 0.5197067240692991]" + ] + }, + "metadata": {}, + "execution_count": 171 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# get the stddev of the RMSEs\n", + "ans = float(np.std(hist))\n", + "print(f\"STD: {ans}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ojRU_dQsU19q", + "outputId": "af81ae44-b3d1-4a22-cc18-4c671640f7ae" + }, + "execution_count": 172, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "STD: 0.006224150161088512\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Final modular functions after going through all the processes above" + ], + "metadata": { + "id": "j5Z98DRNX9Im" + } + }, + { + "cell_type": "code", + "source": [ + "def load_data(file_path=\"car_fuel_efficiency.csv\"):\n", + " df = pd.read_csv(file_path)[[\n", + " 'engine_displacement', 'horsepower', 'vehicle_weight', 'model_year', 'fuel_efficiency_mpg'\n", + " ]]\n", + " return df" + ], + "metadata": { + "id": "NyClaIiiVKTv" + }, + "execution_count": 173, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def preprocess(df, fill_method=\"zero\"):\n", + " df = df.copy()\n", + " if fill_method == \"zero\":\n", + " df = df.fillna(0)\n", + " elif fill_method == \"mean\":\n", + " # Fill NA using training mean; done later in train function\n", + " pass\n", + " return df" + ], + "metadata": { + "id": "KpzAtCJvYLna" + }, + "execution_count": 174, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# split into train, val, test sets\n", + "def split_data(X, y, val, seed=42):\n", + " if val:\n", + " X_tr, X_temp, y_tr, y_temp = train_test_split(X, y, test_size=0.4, random_state=seed)\n", + " X_val, X_t, y_val, y_t = train_test_split(X_temp, y_temp, test_size=0.5, random_state=seed)\n", + " return X_tr, X_val, X_t, y_tr, y_val, y_t\n", + "\n", + " else:\n", + " X_tr, X_t, y_tr, y_t = train_test_split(X, y, test_size=0.4, random_state=seed)\n", + " return X_tr, X_t, y_tr, y_t\n", + "\n" + ], + "metadata": { + "id": "97QpEYAIYSXK" + }, + "execution_count": 185, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def rmse(y_true, y_pred):\n", + " squared_error = (y_true - y_pred) ** 2\n", + " mean_se = squared_error.mean()\n", + " r_mse = np.sqrt(mean_se)\n", + " return float(r_mse)" + ], + "metadata": { + "id": "OXZCZ-nHYbMm" + }, + "execution_count": 177, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Train Linear Regression\n", + "def train_linear(X_tr, y_tr):\n", + " model = LinearRegression()\n", + " model.fit(X_tr, y_tr)\n", + " return model\n", + "\n", + "# Train Ridge Regression with regularization r (alpha)\n", + "def train_ridge(X_tr, y_tr, r):\n", + " model = Ridge(alpha=r)\n", + " model.fit(X_tr, y_tr)\n", + " return model" + ], + "metadata": { + "id": "O91JeC82Ymqy" + }, + "execution_count": 178, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Evaluate model on given dataset\n", + "def evaluate(model, X, y):\n", + " y_pred = model.predict(X)\n", + " return rmse(y, y_pred)" + ], + "metadata": { + "id": "7Avk62gmYzee" + }, + "execution_count": 179, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Q6: Evaluating on test data" + ], + "metadata": { + "id": "9qGthqsJZA3x" + } + }, + { + "cell_type": "code", + "source": [ + "df_ = load_data(\"car_fuel_efficiency.csv\")\n", + "df_.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "oEtdur9pY50X", + "outputId": "69816a05-4a0f-4698-f895-e7096f256982" + }, + "execution_count": 214, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement horsepower vehicle_weight model_year \\\n", + "0 170 159.0 3413.433759 2003 \n", + "1 130 97.0 3149.664934 2007 \n", + "2 170 78.0 3079.038997 2018 \n", + "3 220 NaN 2542.392402 2009 \n", + "4 210 140.0 3460.870990 2009 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 13.231729 \n", + "1 13.688217 \n", + "2 14.246341 \n", + "3 16.912736 \n", + "4 12.488369 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementhorsepowervehicle_weightmodel_yearfuel_efficiency_mpg
0170159.03413.433759200313.231729
113097.03149.664934200713.688217
217078.03079.038997201814.246341
3220NaN2542.392402200916.912736
4210140.03460.870990200912.488369
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df_", + "summary": "{\n \"name\": \"df_\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 30,\n 260,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.879555200339446,\n \"min\": 37.0,\n \"max\": 271.0,\n \"num_unique_values\": 192,\n \"samples\": [\n 128.0,\n 82.0,\n 167.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.8948600311838,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2535.8875912388694,\n 2741.1704843893167,\n 2471.8802372627765\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2005,\n 2002,\n 2003\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.5564677028974288,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 16.642943419221385,\n 16.298377150953442,\n 18.59182197290521\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 214 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_ = preprocess(df_)\n", + "df_.isnull().sum()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "24zaNiO9ZPSV", + "outputId": "0193dfd9-a06f-4a0b-d017-f3c3d0552dab" + }, + "execution_count": 215, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "engine_displacement 0\n", + "horsepower 0\n", + "vehicle_weight 0\n", + "model_year 0\n", + "fuel_efficiency_mpg 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
engine_displacement0
horsepower0
vehicle_weight0
model_year0
fuel_efficiency_mpg0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 215 + } + ] + }, + { + "cell_type": "code", + "source": [ + "df_.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "oNAWrjSiapyT", + "outputId": "31ede318-f4cc-4469-a4eb-7d78c81fd6bf" + }, + "execution_count": 216, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " engine_displacement horsepower vehicle_weight model_year \\\n", + "0 170 159.0 3413.433759 2003 \n", + "1 130 97.0 3149.664934 2007 \n", + "2 170 78.0 3079.038997 2018 \n", + "3 220 0.0 2542.392402 2009 \n", + "4 210 140.0 3460.870990 2009 \n", + "\n", + " fuel_efficiency_mpg \n", + "0 13.231729 \n", + "1 13.688217 \n", + "2 14.246341 \n", + "3 16.912736 \n", + "4 12.488369 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
engine_displacementhorsepowervehicle_weightmodel_yearfuel_efficiency_mpg
0170159.03413.433759200313.231729
113097.03149.664934200713.688217
217078.03079.038997201814.246341
32200.02542.392402200916.912736
4210140.03460.870990200912.488369
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df_", + "summary": "{\n \"name\": \"df_\",\n \"rows\": 9704,\n \"fields\": [\n {\n \"column\": \"engine_displacement\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49,\n \"min\": 10,\n \"max\": 380,\n \"num_unique_values\": 36,\n \"samples\": [\n 30,\n 260,\n 90\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"horsepower\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 48.40118923604211,\n \"min\": 0.0,\n \"max\": 271.0,\n \"num_unique_values\": 193,\n \"samples\": [\n 199.0,\n 82.0,\n 189.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vehicle_weight\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 497.8948600311838,\n \"min\": 952.6817606436496,\n \"max\": 4739.077089392099,\n \"num_unique_values\": 9704,\n \"samples\": [\n 2535.8875912388694,\n 2741.1704843893167,\n 2471.8802372627765\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"model_year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 2000,\n \"max\": 2023,\n \"num_unique_values\": 24,\n \"samples\": [\n 2005,\n 2002,\n 2003\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fuel_efficiency_mpg\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.5564677028974288,\n \"min\": 6.200970533392815,\n \"max\": 25.96722204888372,\n \"num_unique_values\": 9704,\n \"samples\": [\n 16.642943419221385,\n 16.298377150953442,\n 18.59182197290521\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 216 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X = df_.drop(columns=\"fuel_efficiency_mpg\")\n", + "y = df_[\"fuel_efficiency_mpg\"]" + ], + "metadata": { + "id": "5kALcG5KadAs" + }, + "execution_count": 217, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "X_train, X_test, y_train, y_test = split_data(X, y, False, seed=9)" + ], + "metadata": { + "id": "KL7451XJZiZJ" + }, + "execution_count": 218, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model = train_ridge(X_tr=X_train, y_tr=y_train, r=0.001)\n", + "model" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "lmIFBhiDbiET", + "outputId": "3d30c613-2a4c-4955-ed47-1cc6f374db79" + }, + "execution_count": 219, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Ridge(alpha=0.001)" + ], + "text/html": [ + "
Ridge(alpha=0.001)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 219 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Let's see the error on the test data" + ], + "metadata": { + "id": "EQ7vhHswcqbm" + } + }, + { + "cell_type": "code", + "source": [ + "y_pred_test = model.predict(X_test)\n", + "rmse(y_test, y_pred_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "x3oRNJf8bxDw", + "outputId": "fa2a2fc5-4e55-462e-c89c-5b04961a304d" + }, + "execution_count": 220, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.520226928799458" + ] + }, + "metadata": {}, + "execution_count": 220 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## The error on the test set is still pretty reasonable compared to the training and validation sets. For now, looks good!" + ], + "metadata": { + "id": "N5U4gLN7cvBC" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "ZHYeAUJ0cFjH" + }, + "execution_count": 213, + "outputs": [] + } + ] +} \ No newline at end of file