{
"cells": [
{
"cell_type": "code",
"execution_count": 96,
"id": "52e4d5b0-5142-488c-afe8-da951cce0ec5",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "e6eee592-2f01-482a-bf99-2140ba125d70",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.3.2'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.__version__"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "40099d81-2fd2-41cd-ae18-093e7174f8fb",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"car_fuel_efficiency.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "d7432f7f-c628-428e-b7ba-f932f85c0469",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" engine_displacement | \n",
" num_cylinders | \n",
" horsepower | \n",
" vehicle_weight | \n",
" acceleration | \n",
" model_year | \n",
" origin | \n",
" fuel_type | \n",
" drivetrain | \n",
" num_doors | \n",
" fuel_efficiency_mpg | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 170 | \n",
" 3.0 | \n",
" 159.0 | \n",
" 3413.433759 | \n",
" 17.7 | \n",
" 2003 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 0.0 | \n",
" 13.231729 | \n",
"
\n",
" \n",
" | 1 | \n",
" 130 | \n",
" 5.0 | \n",
" 97.0 | \n",
" 3149.664934 | \n",
" 17.8 | \n",
" 2007 | \n",
" USA | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 13.688217 | \n",
"
\n",
" \n",
" | 2 | \n",
" 170 | \n",
" NaN | \n",
" 78.0 | \n",
" 3079.038997 | \n",
" 15.1 | \n",
" 2018 | \n",
" Europe | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 14.246341 | \n",
"
\n",
" \n",
" | 3 | \n",
" 220 | \n",
" 4.0 | \n",
" NaN | \n",
" 2542.392402 | \n",
" 20.2 | \n",
" 2009 | \n",
" USA | \n",
" Diesel | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 16.912736 | \n",
"
\n",
" \n",
" | 4 | \n",
" 210 | \n",
" 1.0 | \n",
" 140.0 | \n",
" 3460.870990 | \n",
" 14.4 | \n",
" 2009 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 12.488369 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" engine_displacement num_cylinders horsepower vehicle_weight \\\n",
"0 170 3.0 159.0 3413.433759 \n",
"1 130 5.0 97.0 3149.664934 \n",
"2 170 NaN 78.0 3079.038997 \n",
"3 220 4.0 NaN 2542.392402 \n",
"4 210 1.0 140.0 3460.870990 \n",
"\n",
" acceleration model_year origin fuel_type drivetrain num_doors \\\n",
"0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
"1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
"2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
"3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
"4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
"\n",
" fuel_efficiency_mpg \n",
"0 13.231729 \n",
"1 13.688217 \n",
"2 14.246341 \n",
"3 16.912736 \n",
"4 12.488369 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "2b12116a-060a-48a5-afe9-fdde99e53fce",
"metadata": {},
"source": [
"## 1. No. of Records"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "515593f3-b510-48c2-9067-d9f8ffec3062",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(9704, 11)"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "markdown",
"id": "2a91bf5f-4be4-49b1-bf26-299b71da934a",
"metadata": {},
"source": [
"## 2. Distinct fuel types"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "28148b46-460f-4dc0-895e-9b445b2c5cca",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Gasoline\n",
"1 Gasoline\n",
"2 Gasoline\n",
"3 Diesel\n",
"4 Gasoline\n",
"Name: fuel_type, dtype: object"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuels = df['fuel_type']\n",
"fuels.head()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "a94560b0-3240-45c0-b075-28873370b87f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuels.nunique()"
]
},
{
"cell_type": "markdown",
"id": "48bb4b87-2a21-408e-866c-8d4bacc57caa",
"metadata": {},
"source": [
"## 3. Null Values"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "7f2d85e3-672f-4e09-a48c-1f26a5627c2d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"engine_displacement 0\n",
"num_cylinders 482\n",
"horsepower 708\n",
"vehicle_weight 0\n",
"acceleration 930\n",
"model_year 0\n",
"origin 0\n",
"fuel_type 0\n",
"drivetrain 0\n",
"num_doors 502\n",
"fuel_efficiency_mpg 0\n",
"dtype: int64"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "markdown",
"id": "98f06e86-38d3-4441-bafe-22f9089f6ee2",
"metadata": {},
"source": [
"Clearly, in fuel types there is no missing values"
]
},
{
"cell_type": "markdown",
"id": "f052a64b-db12-4342-96d0-3f5ca215cca0",
"metadata": {},
"source": [
"## 4. Max fuel efficiency "
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "de01f839-411a-4ed1-bc49-f479174cd8b3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',\n",
" 'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',\n",
" 'num_doors', 'fuel_efficiency_mpg'],\n",
" dtype='object')"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "98a48cd5-38df-4736-b873-1d78c1546bde",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" engine_displacement | \n",
" num_cylinders | \n",
" horsepower | \n",
" vehicle_weight | \n",
" acceleration | \n",
" model_year | \n",
" origin | \n",
" fuel_type | \n",
" drivetrain | \n",
" num_doors | \n",
" fuel_efficiency_mpg | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 170 | \n",
" 3.0 | \n",
" 159.0 | \n",
" 3413.433759 | \n",
" 17.7 | \n",
" 2003 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 0.0 | \n",
" 13.231729 | \n",
"
\n",
" \n",
" | 1 | \n",
" 130 | \n",
" 5.0 | \n",
" 97.0 | \n",
" 3149.664934 | \n",
" 17.8 | \n",
" 2007 | \n",
" USA | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 13.688217 | \n",
"
\n",
" \n",
" | 2 | \n",
" 170 | \n",
" NaN | \n",
" 78.0 | \n",
" 3079.038997 | \n",
" 15.1 | \n",
" 2018 | \n",
" Europe | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 14.246341 | \n",
"
\n",
" \n",
" | 3 | \n",
" 220 | \n",
" 4.0 | \n",
" NaN | \n",
" 2542.392402 | \n",
" 20.2 | \n",
" 2009 | \n",
" USA | \n",
" Diesel | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 16.912736 | \n",
"
\n",
" \n",
" | 4 | \n",
" 210 | \n",
" 1.0 | \n",
" 140.0 | \n",
" 3460.870990 | \n",
" 14.4 | \n",
" 2009 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 12.488369 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" engine_displacement num_cylinders horsepower vehicle_weight \\\n",
"0 170 3.0 159.0 3413.433759 \n",
"1 130 5.0 97.0 3149.664934 \n",
"2 170 NaN 78.0 3079.038997 \n",
"3 220 4.0 NaN 2542.392402 \n",
"4 210 1.0 140.0 3460.870990 \n",
"\n",
" acceleration model_year origin fuel_type drivetrain num_doors \\\n",
"0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
"1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
"2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
"3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
"4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
"\n",
" fuel_efficiency_mpg \n",
"0 13.231729 \n",
"1 13.688217 \n",
"2 14.246341 \n",
"3 16.912736 \n",
"4 12.488369 "
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "25faf234-3f06-4f9b-bd07-0f25de03ee1c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" engine_displacement | \n",
" num_cylinders | \n",
" horsepower | \n",
" vehicle_weight | \n",
" acceleration | \n",
" model_year | \n",
" origin | \n",
" fuel_type | \n",
" drivetrain | \n",
" num_doors | \n",
" fuel_efficiency_mpg | \n",
"
\n",
" \n",
" \n",
" \n",
" | 8 | \n",
" 250 | \n",
" 1.0 | \n",
" 174.0 | \n",
" 2714.219310 | \n",
" 10.3 | \n",
" 2016 | \n",
" Asia | \n",
" Diesel | \n",
" Front-wheel drive | \n",
" -1.0 | \n",
" 16.823554 | \n",
"
\n",
" \n",
" | 12 | \n",
" 320 | \n",
" 5.0 | \n",
" 145.0 | \n",
" 2783.868974 | \n",
" 15.1 | \n",
" 2010 | \n",
" Asia | \n",
" Diesel | \n",
" All-wheel drive | \n",
" 1.0 | \n",
" 16.175820 | \n",
"
\n",
" \n",
" | 14 | \n",
" 200 | \n",
" 6.0 | \n",
" 160.0 | \n",
" 3582.687368 | \n",
" 14.9 | \n",
" 2007 | \n",
" Asia | \n",
" Diesel | \n",
" All-wheel drive | \n",
" 0.0 | \n",
" 11.871091 | \n",
"
\n",
" \n",
" | 20 | \n",
" 150 | \n",
" 3.0 | \n",
" 197.0 | \n",
" 2231.808142 | \n",
" 18.7 | \n",
" 2011 | \n",
" Asia | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 1.0 | \n",
" 18.889083 | \n",
"
\n",
" \n",
" | 21 | \n",
" 160 | \n",
" 4.0 | \n",
" 133.0 | \n",
" 2659.431451 | \n",
" NaN | \n",
" 2016 | \n",
" Asia | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" -1.0 | \n",
" 16.077730 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" engine_displacement num_cylinders horsepower vehicle_weight \\\n",
"8 250 1.0 174.0 2714.219310 \n",
"12 320 5.0 145.0 2783.868974 \n",
"14 200 6.0 160.0 3582.687368 \n",
"20 150 3.0 197.0 2231.808142 \n",
"21 160 4.0 133.0 2659.431451 \n",
"\n",
" acceleration model_year origin fuel_type drivetrain num_doors \\\n",
"8 10.3 2016 Asia Diesel Front-wheel drive -1.0 \n",
"12 15.1 2010 Asia Diesel All-wheel drive 1.0 \n",
"14 14.9 2007 Asia Diesel All-wheel drive 0.0 \n",
"20 18.7 2011 Asia Gasoline Front-wheel drive 1.0 \n",
"21 NaN 2016 Asia Gasoline Front-wheel drive -1.0 \n",
"\n",
" fuel_efficiency_mpg \n",
"8 16.823554 \n",
"12 16.175820 \n",
"14 11.871091 \n",
"20 18.889083 \n",
"21 16.077730 "
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mask_asia = df['origin'] == 'Asia'\n",
"eff = df[mask_asia]\n",
"eff.head()"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "5d1f7efd-77f9-4568-a012-5cd1ba753fd6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"23.759122836520497"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"max_eff = max(eff['fuel_efficiency_mpg'])\n",
"max_eff"
]
},
{
"cell_type": "markdown",
"id": "cda34c7c-f2c0-497d-b419-dae670db022b",
"metadata": {},
"source": [
"## 5. Median value of horsepower"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "e8328da7-f04f-41bd-94f5-b534aa00f2c1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"engine_displacement 0\n",
"num_cylinders 482\n",
"horsepower 708\n",
"vehicle_weight 0\n",
"acceleration 930\n",
"model_year 0\n",
"origin 0\n",
"fuel_type 0\n",
"drivetrain 0\n",
"num_doors 502\n",
"fuel_efficiency_mpg 0\n",
"dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "6eaafaf8-6674-443d-b26c-6d8212d91754",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"149.0"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# median of the horsepower col\n",
"df['horsepower'].median()"
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "9b785320-6b9a-41c0-bb27-c0f126145177",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"horsepower\n",
"152.0 142\n",
"145.0 141\n",
"151.0 134\n",
"148.0 130\n",
"141.0 130\n",
" ... \n",
"40.0 1\n",
"57.0 1\n",
"245.0 1\n",
"252.0 1\n",
"61.0 1\n",
"Name: count, Length: 192, dtype: int64"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# most frequent value here\n",
"df['horsepower'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 74,
"id": "126b6df6-515f-463e-83f3-10abbf2c25e2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(152.0)"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# alternatively mode gives the max freq count\n",
"mode_horsepower = df['horsepower'].mode()[0]\n",
"mode_horsepower"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "bd17e63f-c5c1-4d8a-8ba5-1b8e106175fc",
"metadata": {},
"outputs": [],
"source": [
"# fill the missing values in the col with mode\n",
"df['horsepower'].fillna(mode_horsepower, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"id": "e7dc6b1a-323a-4f88-b475-f76059759e66",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"engine_displacement 0\n",
"num_cylinders 482\n",
"horsepower 0\n",
"vehicle_weight 0\n",
"acceleration 930\n",
"model_year 0\n",
"origin 0\n",
"fuel_type 0\n",
"drivetrain 0\n",
"num_doors 502\n",
"fuel_efficiency_mpg 0\n",
"dtype: int64"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check if null values are removed or not\n",
"df.isnull().sum()"
]
},
{
"cell_type": "markdown",
"id": "3eaf5439-3f99-4506-bdd8-ca35e03c18bf",
"metadata": {},
"source": [
"Clearly the null values have been imputed"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "94dc61c5-bbcb-47f4-9370-f3238c26e2a2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"152.0"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# now recalculate the median\n",
"df['horsepower'].median()"
]
},
{
"cell_type": "markdown",
"id": "32337e0a-dbfc-4e96-9fab-15723b3a5166",
"metadata": {},
"source": [
"## 6. Model building"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "a28d7bfb-3f4d-4018-8881-b39bf43d4089",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" engine_displacement | \n",
" num_cylinders | \n",
" horsepower | \n",
" vehicle_weight | \n",
" acceleration | \n",
" model_year | \n",
" origin | \n",
" fuel_type | \n",
" drivetrain | \n",
" num_doors | \n",
" fuel_efficiency_mpg | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 170 | \n",
" 3.0 | \n",
" 159.0 | \n",
" 3413.433759 | \n",
" 17.7 | \n",
" 2003 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 0.0 | \n",
" 13.231729 | \n",
"
\n",
" \n",
" | 1 | \n",
" 130 | \n",
" 5.0 | \n",
" 97.0 | \n",
" 3149.664934 | \n",
" 17.8 | \n",
" 2007 | \n",
" USA | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 13.688217 | \n",
"
\n",
" \n",
" | 2 | \n",
" 170 | \n",
" NaN | \n",
" 78.0 | \n",
" 3079.038997 | \n",
" 15.1 | \n",
" 2018 | \n",
" Europe | \n",
" Gasoline | \n",
" Front-wheel drive | \n",
" 0.0 | \n",
" 14.246341 | \n",
"
\n",
" \n",
" | 3 | \n",
" 220 | \n",
" 4.0 | \n",
" 152.0 | \n",
" 2542.392402 | \n",
" 20.2 | \n",
" 2009 | \n",
" USA | \n",
" Diesel | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 16.912736 | \n",
"
\n",
" \n",
" | 4 | \n",
" 210 | \n",
" 1.0 | \n",
" 140.0 | \n",
" 3460.870990 | \n",
" 14.4 | \n",
" 2009 | \n",
" Europe | \n",
" Gasoline | \n",
" All-wheel drive | \n",
" 2.0 | \n",
" 12.488369 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" engine_displacement num_cylinders horsepower vehicle_weight \\\n",
"0 170 3.0 159.0 3413.433759 \n",
"1 130 5.0 97.0 3149.664934 \n",
"2 170 NaN 78.0 3079.038997 \n",
"3 220 4.0 152.0 2542.392402 \n",
"4 210 1.0 140.0 3460.870990 \n",
"\n",
" acceleration model_year origin fuel_type drivetrain num_doors \\\n",
"0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
"1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
"2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
"3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
"4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
"\n",
" fuel_efficiency_mpg \n",
"0 13.231729 \n",
"1 13.688217 \n",
"2 14.246341 \n",
"3 16.912736 \n",
"4 12.488369 "
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "30057fab-fad4-44ae-9b9b-2aae11614f84",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 False\n",
"1 False\n",
"2 False\n",
"3 False\n",
"4 False\n",
"Name: origin, dtype: bool"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mask_asia.head()"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "dbaa1132-9a2f-411a-9668-b5110109e3aa",
"metadata": {},
"outputs": [],
"source": [
"columns_to_keep = ['vehicle_weight', 'model_year']"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "c37eb7f0-4e38-4a8d-b5a0-f54ba43ef6c7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" vehicle_weight | \n",
" model_year | \n",
"
\n",
" \n",
" \n",
" \n",
" | 8 | \n",
" 2714.219310 | \n",
" 2016 | \n",
"
\n",
" \n",
" | 12 | \n",
" 2783.868974 | \n",
" 2010 | \n",
"
\n",
" \n",
" | 14 | \n",
" 3582.687368 | \n",
" 2007 | \n",
"
\n",
" \n",
" | 20 | \n",
" 2231.808142 | \n",
" 2011 | \n",
"
\n",
" \n",
" | 21 | \n",
" 2659.431451 | \n",
" 2016 | \n",
"
\n",
" \n",
" | 34 | \n",
" 2844.227534 | \n",
" 2014 | \n",
"
\n",
" \n",
" | 38 | \n",
" 3761.994038 | \n",
" 2019 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" vehicle_weight model_year\n",
"8 2714.219310 2016\n",
"12 2783.868974 2010\n",
"14 3582.687368 2007\n",
"20 2231.808142 2011\n",
"21 2659.431451 2016\n",
"34 2844.227534 2014\n",
"38 3761.994038 2019"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# subset the asian data\n",
"df_asia = df[mask_asia]\n",
"df_asia_final = df_asia[columns_to_keep].head(7)\n",
"df_asia_final"
]
},
{
"cell_type": "code",
"execution_count": 100,
"id": "89abd22c-7cc2-49b9-8afd-4e824f4360c7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(7, 2)"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get the underlying numpy array\n",
"X = np.array(df_asia_final)\n",
"X.shape"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "252a6e2f-c7f9-4c30-b74a-8b4e1ea876ab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 2)"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# take the dot product with the traspose (7,2).(2,7) -> (7,7)\n",
"XTX = X.T @ X\n",
"XTX.shape"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "63342692-a307-48cd-a6cf-bdfc8e1985c1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2, 2)"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"XTX_inv = np.linalg.inv(XTX)\n",
"XTX_inv.shape"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "e4b0a33e-ee66-48d3-82d4-7953e0a64461",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1100, 1300, 800, 900, 1000, 1100, 1200])"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create an array y with values \n",
"y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])\n",
"y "
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "e5b0bf7e-9e5d-46c8-9d47-4555f05bfc6f",
"metadata": {},
"outputs": [],
"source": [
"# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w\n",
"step = XTX_inv @ X.T\n",
"w = step @ y"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "1ddb3d1f-b877-4c66-9245-098cd63b850a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.float64(0.5187709081074016)"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sum of all the elements of the result / weights\n",
"np.sum(w)"
]
},
{
"cell_type": "markdown",
"id": "5cad1468-2329-4fa3-9b91-0dc30dffafbc",
"metadata": {},
"source": [
"## End of Week 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2bbec182-f585-43fa-9960-ca979139c0e2",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}