sks01dev commited on
Commit
3f7fe3b
·
1 Parent(s): fcbfaff

Add files via upload

Browse files
Files changed (2) hide show
  1. Week 1/Week 1.ipynb +1320 -0
  2. Week 1/car_fuel_efficiency.csv +0 -0
Week 1/Week 1.ipynb ADDED
@@ -0,0 +1,1320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 96,
6
+ "id": "52e4d5b0-5142-488c-afe8-da951cce0ec5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import warnings\n",
12
+ "warnings.filterwarnings('ignore')\n",
13
+ "import numpy as np"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "code",
18
+ "execution_count": 2,
19
+ "id": "e6eee592-2f01-482a-bf99-2140ba125d70",
20
+ "metadata": {},
21
+ "outputs": [
22
+ {
23
+ "data": {
24
+ "text/plain": [
25
+ "'2.3.2'"
26
+ ]
27
+ },
28
+ "execution_count": 2,
29
+ "metadata": {},
30
+ "output_type": "execute_result"
31
+ }
32
+ ],
33
+ "source": [
34
+ "pd.__version__"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 55,
40
+ "id": "40099d81-2fd2-41cd-ae18-093e7174f8fb",
41
+ "metadata": {},
42
+ "outputs": [],
43
+ "source": [
44
+ "df = pd.read_csv(\"car_fuel_efficiency.csv\")"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 56,
50
+ "id": "d7432f7f-c628-428e-b7ba-f932f85c0469",
51
+ "metadata": {},
52
+ "outputs": [
53
+ {
54
+ "data": {
55
+ "text/html": [
56
+ "<div>\n",
57
+ "<style scoped>\n",
58
+ " .dataframe tbody tr th:only-of-type {\n",
59
+ " vertical-align: middle;\n",
60
+ " }\n",
61
+ "\n",
62
+ " .dataframe tbody tr th {\n",
63
+ " vertical-align: top;\n",
64
+ " }\n",
65
+ "\n",
66
+ " .dataframe thead th {\n",
67
+ " text-align: right;\n",
68
+ " }\n",
69
+ "</style>\n",
70
+ "<table border=\"1\" class=\"dataframe\">\n",
71
+ " <thead>\n",
72
+ " <tr style=\"text-align: right;\">\n",
73
+ " <th></th>\n",
74
+ " <th>engine_displacement</th>\n",
75
+ " <th>num_cylinders</th>\n",
76
+ " <th>horsepower</th>\n",
77
+ " <th>vehicle_weight</th>\n",
78
+ " <th>acceleration</th>\n",
79
+ " <th>model_year</th>\n",
80
+ " <th>origin</th>\n",
81
+ " <th>fuel_type</th>\n",
82
+ " <th>drivetrain</th>\n",
83
+ " <th>num_doors</th>\n",
84
+ " <th>fuel_efficiency_mpg</th>\n",
85
+ " </tr>\n",
86
+ " </thead>\n",
87
+ " <tbody>\n",
88
+ " <tr>\n",
89
+ " <th>0</th>\n",
90
+ " <td>170</td>\n",
91
+ " <td>3.0</td>\n",
92
+ " <td>159.0</td>\n",
93
+ " <td>3413.433759</td>\n",
94
+ " <td>17.7</td>\n",
95
+ " <td>2003</td>\n",
96
+ " <td>Europe</td>\n",
97
+ " <td>Gasoline</td>\n",
98
+ " <td>All-wheel drive</td>\n",
99
+ " <td>0.0</td>\n",
100
+ " <td>13.231729</td>\n",
101
+ " </tr>\n",
102
+ " <tr>\n",
103
+ " <th>1</th>\n",
104
+ " <td>130</td>\n",
105
+ " <td>5.0</td>\n",
106
+ " <td>97.0</td>\n",
107
+ " <td>3149.664934</td>\n",
108
+ " <td>17.8</td>\n",
109
+ " <td>2007</td>\n",
110
+ " <td>USA</td>\n",
111
+ " <td>Gasoline</td>\n",
112
+ " <td>Front-wheel drive</td>\n",
113
+ " <td>0.0</td>\n",
114
+ " <td>13.688217</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>2</th>\n",
118
+ " <td>170</td>\n",
119
+ " <td>NaN</td>\n",
120
+ " <td>78.0</td>\n",
121
+ " <td>3079.038997</td>\n",
122
+ " <td>15.1</td>\n",
123
+ " <td>2018</td>\n",
124
+ " <td>Europe</td>\n",
125
+ " <td>Gasoline</td>\n",
126
+ " <td>Front-wheel drive</td>\n",
127
+ " <td>0.0</td>\n",
128
+ " <td>14.246341</td>\n",
129
+ " </tr>\n",
130
+ " <tr>\n",
131
+ " <th>3</th>\n",
132
+ " <td>220</td>\n",
133
+ " <td>4.0</td>\n",
134
+ " <td>NaN</td>\n",
135
+ " <td>2542.392402</td>\n",
136
+ " <td>20.2</td>\n",
137
+ " <td>2009</td>\n",
138
+ " <td>USA</td>\n",
139
+ " <td>Diesel</td>\n",
140
+ " <td>All-wheel drive</td>\n",
141
+ " <td>2.0</td>\n",
142
+ " <td>16.912736</td>\n",
143
+ " </tr>\n",
144
+ " <tr>\n",
145
+ " <th>4</th>\n",
146
+ " <td>210</td>\n",
147
+ " <td>1.0</td>\n",
148
+ " <td>140.0</td>\n",
149
+ " <td>3460.870990</td>\n",
150
+ " <td>14.4</td>\n",
151
+ " <td>2009</td>\n",
152
+ " <td>Europe</td>\n",
153
+ " <td>Gasoline</td>\n",
154
+ " <td>All-wheel drive</td>\n",
155
+ " <td>2.0</td>\n",
156
+ " <td>12.488369</td>\n",
157
+ " </tr>\n",
158
+ " </tbody>\n",
159
+ "</table>\n",
160
+ "</div>"
161
+ ],
162
+ "text/plain": [
163
+ " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
164
+ "0 170 3.0 159.0 3413.433759 \n",
165
+ "1 130 5.0 97.0 3149.664934 \n",
166
+ "2 170 NaN 78.0 3079.038997 \n",
167
+ "3 220 4.0 NaN 2542.392402 \n",
168
+ "4 210 1.0 140.0 3460.870990 \n",
169
+ "\n",
170
+ " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
171
+ "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
172
+ "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
173
+ "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
174
+ "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
175
+ "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
176
+ "\n",
177
+ " fuel_efficiency_mpg \n",
178
+ "0 13.231729 \n",
179
+ "1 13.688217 \n",
180
+ "2 14.246341 \n",
181
+ "3 16.912736 \n",
182
+ "4 12.488369 "
183
+ ]
184
+ },
185
+ "execution_count": 56,
186
+ "metadata": {},
187
+ "output_type": "execute_result"
188
+ }
189
+ ],
190
+ "source": [
191
+ "df.head()"
192
+ ]
193
+ },
194
+ {
195
+ "cell_type": "markdown",
196
+ "id": "2b12116a-060a-48a5-afe9-fdde99e53fce",
197
+ "metadata": {},
198
+ "source": [
199
+ "## 1. No. of Records"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": 57,
205
+ "id": "515593f3-b510-48c2-9067-d9f8ffec3062",
206
+ "metadata": {},
207
+ "outputs": [
208
+ {
209
+ "data": {
210
+ "text/plain": [
211
+ "(9704, 11)"
212
+ ]
213
+ },
214
+ "execution_count": 57,
215
+ "metadata": {},
216
+ "output_type": "execute_result"
217
+ }
218
+ ],
219
+ "source": [
220
+ "df.shape"
221
+ ]
222
+ },
223
+ {
224
+ "cell_type": "markdown",
225
+ "id": "2a91bf5f-4be4-49b1-bf26-299b71da934a",
226
+ "metadata": {},
227
+ "source": [
228
+ "## 2. Distinct fuel types"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "execution_count": 58,
234
+ "id": "28148b46-460f-4dc0-895e-9b445b2c5cca",
235
+ "metadata": {},
236
+ "outputs": [
237
+ {
238
+ "data": {
239
+ "text/plain": [
240
+ "0 Gasoline\n",
241
+ "1 Gasoline\n",
242
+ "2 Gasoline\n",
243
+ "3 Diesel\n",
244
+ "4 Gasoline\n",
245
+ "Name: fuel_type, dtype: object"
246
+ ]
247
+ },
248
+ "execution_count": 58,
249
+ "metadata": {},
250
+ "output_type": "execute_result"
251
+ }
252
+ ],
253
+ "source": [
254
+ "fuels = df['fuel_type']\n",
255
+ "fuels.head()"
256
+ ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": 59,
261
+ "id": "a94560b0-3240-45c0-b075-28873370b87f",
262
+ "metadata": {},
263
+ "outputs": [
264
+ {
265
+ "data": {
266
+ "text/plain": [
267
+ "2"
268
+ ]
269
+ },
270
+ "execution_count": 59,
271
+ "metadata": {},
272
+ "output_type": "execute_result"
273
+ }
274
+ ],
275
+ "source": [
276
+ "fuels.nunique()"
277
+ ]
278
+ },
279
+ {
280
+ "cell_type": "markdown",
281
+ "id": "48bb4b87-2a21-408e-866c-8d4bacc57caa",
282
+ "metadata": {},
283
+ "source": [
284
+ "## 3. Null Values"
285
+ ]
286
+ },
287
+ {
288
+ "cell_type": "code",
289
+ "execution_count": 60,
290
+ "id": "7f2d85e3-672f-4e09-a48c-1f26a5627c2d",
291
+ "metadata": {},
292
+ "outputs": [
293
+ {
294
+ "data": {
295
+ "text/plain": [
296
+ "engine_displacement 0\n",
297
+ "num_cylinders 482\n",
298
+ "horsepower 708\n",
299
+ "vehicle_weight 0\n",
300
+ "acceleration 930\n",
301
+ "model_year 0\n",
302
+ "origin 0\n",
303
+ "fuel_type 0\n",
304
+ "drivetrain 0\n",
305
+ "num_doors 502\n",
306
+ "fuel_efficiency_mpg 0\n",
307
+ "dtype: int64"
308
+ ]
309
+ },
310
+ "execution_count": 60,
311
+ "metadata": {},
312
+ "output_type": "execute_result"
313
+ }
314
+ ],
315
+ "source": [
316
+ "df.isnull().sum()"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "markdown",
321
+ "id": "98f06e86-38d3-4441-bafe-22f9089f6ee2",
322
+ "metadata": {},
323
+ "source": [
324
+ "Clearly, in fuel types there is no missing values"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "markdown",
329
+ "id": "f052a64b-db12-4342-96d0-3f5ca215cca0",
330
+ "metadata": {},
331
+ "source": [
332
+ "## 4. Max fuel efficiency "
333
+ ]
334
+ },
335
+ {
336
+ "cell_type": "code",
337
+ "execution_count": 61,
338
+ "id": "de01f839-411a-4ed1-bc49-f479174cd8b3",
339
+ "metadata": {},
340
+ "outputs": [
341
+ {
342
+ "data": {
343
+ "text/plain": [
344
+ "Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',\n",
345
+ " 'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',\n",
346
+ " 'num_doors', 'fuel_efficiency_mpg'],\n",
347
+ " dtype='object')"
348
+ ]
349
+ },
350
+ "execution_count": 61,
351
+ "metadata": {},
352
+ "output_type": "execute_result"
353
+ }
354
+ ],
355
+ "source": [
356
+ "df.columns"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": 62,
362
+ "id": "98a48cd5-38df-4736-b873-1d78c1546bde",
363
+ "metadata": {
364
+ "scrolled": true
365
+ },
366
+ "outputs": [
367
+ {
368
+ "data": {
369
+ "text/html": [
370
+ "<div>\n",
371
+ "<style scoped>\n",
372
+ " .dataframe tbody tr th:only-of-type {\n",
373
+ " vertical-align: middle;\n",
374
+ " }\n",
375
+ "\n",
376
+ " .dataframe tbody tr th {\n",
377
+ " vertical-align: top;\n",
378
+ " }\n",
379
+ "\n",
380
+ " .dataframe thead th {\n",
381
+ " text-align: right;\n",
382
+ " }\n",
383
+ "</style>\n",
384
+ "<table border=\"1\" class=\"dataframe\">\n",
385
+ " <thead>\n",
386
+ " <tr style=\"text-align: right;\">\n",
387
+ " <th></th>\n",
388
+ " <th>engine_displacement</th>\n",
389
+ " <th>num_cylinders</th>\n",
390
+ " <th>horsepower</th>\n",
391
+ " <th>vehicle_weight</th>\n",
392
+ " <th>acceleration</th>\n",
393
+ " <th>model_year</th>\n",
394
+ " <th>origin</th>\n",
395
+ " <th>fuel_type</th>\n",
396
+ " <th>drivetrain</th>\n",
397
+ " <th>num_doors</th>\n",
398
+ " <th>fuel_efficiency_mpg</th>\n",
399
+ " </tr>\n",
400
+ " </thead>\n",
401
+ " <tbody>\n",
402
+ " <tr>\n",
403
+ " <th>0</th>\n",
404
+ " <td>170</td>\n",
405
+ " <td>3.0</td>\n",
406
+ " <td>159.0</td>\n",
407
+ " <td>3413.433759</td>\n",
408
+ " <td>17.7</td>\n",
409
+ " <td>2003</td>\n",
410
+ " <td>Europe</td>\n",
411
+ " <td>Gasoline</td>\n",
412
+ " <td>All-wheel drive</td>\n",
413
+ " <td>0.0</td>\n",
414
+ " <td>13.231729</td>\n",
415
+ " </tr>\n",
416
+ " <tr>\n",
417
+ " <th>1</th>\n",
418
+ " <td>130</td>\n",
419
+ " <td>5.0</td>\n",
420
+ " <td>97.0</td>\n",
421
+ " <td>3149.664934</td>\n",
422
+ " <td>17.8</td>\n",
423
+ " <td>2007</td>\n",
424
+ " <td>USA</td>\n",
425
+ " <td>Gasoline</td>\n",
426
+ " <td>Front-wheel drive</td>\n",
427
+ " <td>0.0</td>\n",
428
+ " <td>13.688217</td>\n",
429
+ " </tr>\n",
430
+ " <tr>\n",
431
+ " <th>2</th>\n",
432
+ " <td>170</td>\n",
433
+ " <td>NaN</td>\n",
434
+ " <td>78.0</td>\n",
435
+ " <td>3079.038997</td>\n",
436
+ " <td>15.1</td>\n",
437
+ " <td>2018</td>\n",
438
+ " <td>Europe</td>\n",
439
+ " <td>Gasoline</td>\n",
440
+ " <td>Front-wheel drive</td>\n",
441
+ " <td>0.0</td>\n",
442
+ " <td>14.246341</td>\n",
443
+ " </tr>\n",
444
+ " <tr>\n",
445
+ " <th>3</th>\n",
446
+ " <td>220</td>\n",
447
+ " <td>4.0</td>\n",
448
+ " <td>NaN</td>\n",
449
+ " <td>2542.392402</td>\n",
450
+ " <td>20.2</td>\n",
451
+ " <td>2009</td>\n",
452
+ " <td>USA</td>\n",
453
+ " <td>Diesel</td>\n",
454
+ " <td>All-wheel drive</td>\n",
455
+ " <td>2.0</td>\n",
456
+ " <td>16.912736</td>\n",
457
+ " </tr>\n",
458
+ " <tr>\n",
459
+ " <th>4</th>\n",
460
+ " <td>210</td>\n",
461
+ " <td>1.0</td>\n",
462
+ " <td>140.0</td>\n",
463
+ " <td>3460.870990</td>\n",
464
+ " <td>14.4</td>\n",
465
+ " <td>2009</td>\n",
466
+ " <td>Europe</td>\n",
467
+ " <td>Gasoline</td>\n",
468
+ " <td>All-wheel drive</td>\n",
469
+ " <td>2.0</td>\n",
470
+ " <td>12.488369</td>\n",
471
+ " </tr>\n",
472
+ " </tbody>\n",
473
+ "</table>\n",
474
+ "</div>"
475
+ ],
476
+ "text/plain": [
477
+ " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
478
+ "0 170 3.0 159.0 3413.433759 \n",
479
+ "1 130 5.0 97.0 3149.664934 \n",
480
+ "2 170 NaN 78.0 3079.038997 \n",
481
+ "3 220 4.0 NaN 2542.392402 \n",
482
+ "4 210 1.0 140.0 3460.870990 \n",
483
+ "\n",
484
+ " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
485
+ "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
486
+ "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
487
+ "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
488
+ "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
489
+ "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
490
+ "\n",
491
+ " fuel_efficiency_mpg \n",
492
+ "0 13.231729 \n",
493
+ "1 13.688217 \n",
494
+ "2 14.246341 \n",
495
+ "3 16.912736 \n",
496
+ "4 12.488369 "
497
+ ]
498
+ },
499
+ "execution_count": 62,
500
+ "metadata": {},
501
+ "output_type": "execute_result"
502
+ }
503
+ ],
504
+ "source": [
505
+ "df.head()"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "code",
510
+ "execution_count": 63,
511
+ "id": "25faf234-3f06-4f9b-bd07-0f25de03ee1c",
512
+ "metadata": {},
513
+ "outputs": [
514
+ {
515
+ "data": {
516
+ "text/html": [
517
+ "<div>\n",
518
+ "<style scoped>\n",
519
+ " .dataframe tbody tr th:only-of-type {\n",
520
+ " vertical-align: middle;\n",
521
+ " }\n",
522
+ "\n",
523
+ " .dataframe tbody tr th {\n",
524
+ " vertical-align: top;\n",
525
+ " }\n",
526
+ "\n",
527
+ " .dataframe thead th {\n",
528
+ " text-align: right;\n",
529
+ " }\n",
530
+ "</style>\n",
531
+ "<table border=\"1\" class=\"dataframe\">\n",
532
+ " <thead>\n",
533
+ " <tr style=\"text-align: right;\">\n",
534
+ " <th></th>\n",
535
+ " <th>engine_displacement</th>\n",
536
+ " <th>num_cylinders</th>\n",
537
+ " <th>horsepower</th>\n",
538
+ " <th>vehicle_weight</th>\n",
539
+ " <th>acceleration</th>\n",
540
+ " <th>model_year</th>\n",
541
+ " <th>origin</th>\n",
542
+ " <th>fuel_type</th>\n",
543
+ " <th>drivetrain</th>\n",
544
+ " <th>num_doors</th>\n",
545
+ " <th>fuel_efficiency_mpg</th>\n",
546
+ " </tr>\n",
547
+ " </thead>\n",
548
+ " <tbody>\n",
549
+ " <tr>\n",
550
+ " <th>8</th>\n",
551
+ " <td>250</td>\n",
552
+ " <td>1.0</td>\n",
553
+ " <td>174.0</td>\n",
554
+ " <td>2714.219310</td>\n",
555
+ " <td>10.3</td>\n",
556
+ " <td>2016</td>\n",
557
+ " <td>Asia</td>\n",
558
+ " <td>Diesel</td>\n",
559
+ " <td>Front-wheel drive</td>\n",
560
+ " <td>-1.0</td>\n",
561
+ " <td>16.823554</td>\n",
562
+ " </tr>\n",
563
+ " <tr>\n",
564
+ " <th>12</th>\n",
565
+ " <td>320</td>\n",
566
+ " <td>5.0</td>\n",
567
+ " <td>145.0</td>\n",
568
+ " <td>2783.868974</td>\n",
569
+ " <td>15.1</td>\n",
570
+ " <td>2010</td>\n",
571
+ " <td>Asia</td>\n",
572
+ " <td>Diesel</td>\n",
573
+ " <td>All-wheel drive</td>\n",
574
+ " <td>1.0</td>\n",
575
+ " <td>16.175820</td>\n",
576
+ " </tr>\n",
577
+ " <tr>\n",
578
+ " <th>14</th>\n",
579
+ " <td>200</td>\n",
580
+ " <td>6.0</td>\n",
581
+ " <td>160.0</td>\n",
582
+ " <td>3582.687368</td>\n",
583
+ " <td>14.9</td>\n",
584
+ " <td>2007</td>\n",
585
+ " <td>Asia</td>\n",
586
+ " <td>Diesel</td>\n",
587
+ " <td>All-wheel drive</td>\n",
588
+ " <td>0.0</td>\n",
589
+ " <td>11.871091</td>\n",
590
+ " </tr>\n",
591
+ " <tr>\n",
592
+ " <th>20</th>\n",
593
+ " <td>150</td>\n",
594
+ " <td>3.0</td>\n",
595
+ " <td>197.0</td>\n",
596
+ " <td>2231.808142</td>\n",
597
+ " <td>18.7</td>\n",
598
+ " <td>2011</td>\n",
599
+ " <td>Asia</td>\n",
600
+ " <td>Gasoline</td>\n",
601
+ " <td>Front-wheel drive</td>\n",
602
+ " <td>1.0</td>\n",
603
+ " <td>18.889083</td>\n",
604
+ " </tr>\n",
605
+ " <tr>\n",
606
+ " <th>21</th>\n",
607
+ " <td>160</td>\n",
608
+ " <td>4.0</td>\n",
609
+ " <td>133.0</td>\n",
610
+ " <td>2659.431451</td>\n",
611
+ " <td>NaN</td>\n",
612
+ " <td>2016</td>\n",
613
+ " <td>Asia</td>\n",
614
+ " <td>Gasoline</td>\n",
615
+ " <td>Front-wheel drive</td>\n",
616
+ " <td>-1.0</td>\n",
617
+ " <td>16.077730</td>\n",
618
+ " </tr>\n",
619
+ " </tbody>\n",
620
+ "</table>\n",
621
+ "</div>"
622
+ ],
623
+ "text/plain": [
624
+ " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
625
+ "8 250 1.0 174.0 2714.219310 \n",
626
+ "12 320 5.0 145.0 2783.868974 \n",
627
+ "14 200 6.0 160.0 3582.687368 \n",
628
+ "20 150 3.0 197.0 2231.808142 \n",
629
+ "21 160 4.0 133.0 2659.431451 \n",
630
+ "\n",
631
+ " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
632
+ "8 10.3 2016 Asia Diesel Front-wheel drive -1.0 \n",
633
+ "12 15.1 2010 Asia Diesel All-wheel drive 1.0 \n",
634
+ "14 14.9 2007 Asia Diesel All-wheel drive 0.0 \n",
635
+ "20 18.7 2011 Asia Gasoline Front-wheel drive 1.0 \n",
636
+ "21 NaN 2016 Asia Gasoline Front-wheel drive -1.0 \n",
637
+ "\n",
638
+ " fuel_efficiency_mpg \n",
639
+ "8 16.823554 \n",
640
+ "12 16.175820 \n",
641
+ "14 11.871091 \n",
642
+ "20 18.889083 \n",
643
+ "21 16.077730 "
644
+ ]
645
+ },
646
+ "execution_count": 63,
647
+ "metadata": {},
648
+ "output_type": "execute_result"
649
+ }
650
+ ],
651
+ "source": [
652
+ "mask_asia = df['origin'] == 'Asia'\n",
653
+ "eff = df[mask_asia]\n",
654
+ "eff.head()"
655
+ ]
656
+ },
657
+ {
658
+ "cell_type": "code",
659
+ "execution_count": 64,
660
+ "id": "5d1f7efd-77f9-4568-a012-5cd1ba753fd6",
661
+ "metadata": {},
662
+ "outputs": [
663
+ {
664
+ "data": {
665
+ "text/plain": [
666
+ "23.759122836520497"
667
+ ]
668
+ },
669
+ "execution_count": 64,
670
+ "metadata": {},
671
+ "output_type": "execute_result"
672
+ }
673
+ ],
674
+ "source": [
675
+ "max_eff = max(eff['fuel_efficiency_mpg'])\n",
676
+ "max_eff"
677
+ ]
678
+ },
679
+ {
680
+ "cell_type": "markdown",
681
+ "id": "cda34c7c-f2c0-497d-b419-dae670db022b",
682
+ "metadata": {},
683
+ "source": [
684
+ "## 5. Median value of horsepower"
685
+ ]
686
+ },
687
+ {
688
+ "cell_type": "code",
689
+ "execution_count": 65,
690
+ "id": "e8328da7-f04f-41bd-94f5-b534aa00f2c1",
691
+ "metadata": {},
692
+ "outputs": [
693
+ {
694
+ "data": {
695
+ "text/plain": [
696
+ "engine_displacement 0\n",
697
+ "num_cylinders 482\n",
698
+ "horsepower 708\n",
699
+ "vehicle_weight 0\n",
700
+ "acceleration 930\n",
701
+ "model_year 0\n",
702
+ "origin 0\n",
703
+ "fuel_type 0\n",
704
+ "drivetrain 0\n",
705
+ "num_doors 502\n",
706
+ "fuel_efficiency_mpg 0\n",
707
+ "dtype: int64"
708
+ ]
709
+ },
710
+ "execution_count": 65,
711
+ "metadata": {},
712
+ "output_type": "execute_result"
713
+ }
714
+ ],
715
+ "source": [
716
+ "df.isnull().sum()"
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": 66,
722
+ "id": "6eaafaf8-6674-443d-b26c-6d8212d91754",
723
+ "metadata": {},
724
+ "outputs": [
725
+ {
726
+ "data": {
727
+ "text/plain": [
728
+ "149.0"
729
+ ]
730
+ },
731
+ "execution_count": 66,
732
+ "metadata": {},
733
+ "output_type": "execute_result"
734
+ }
735
+ ],
736
+ "source": [
737
+ "# median of the horsepower col\n",
738
+ "df['horsepower'].median()"
739
+ ]
740
+ },
741
+ {
742
+ "cell_type": "code",
743
+ "execution_count": 67,
744
+ "id": "9b785320-6b9a-41c0-bb27-c0f126145177",
745
+ "metadata": {},
746
+ "outputs": [
747
+ {
748
+ "data": {
749
+ "text/plain": [
750
+ "horsepower\n",
751
+ "152.0 142\n",
752
+ "145.0 141\n",
753
+ "151.0 134\n",
754
+ "148.0 130\n",
755
+ "141.0 130\n",
756
+ " ... \n",
757
+ "40.0 1\n",
758
+ "57.0 1\n",
759
+ "245.0 1\n",
760
+ "252.0 1\n",
761
+ "61.0 1\n",
762
+ "Name: count, Length: 192, dtype: int64"
763
+ ]
764
+ },
765
+ "execution_count": 67,
766
+ "metadata": {},
767
+ "output_type": "execute_result"
768
+ }
769
+ ],
770
+ "source": [
771
+ "# most frequent value here\n",
772
+ "df['horsepower'].value_counts()"
773
+ ]
774
+ },
775
+ {
776
+ "cell_type": "code",
777
+ "execution_count": 74,
778
+ "id": "126b6df6-515f-463e-83f3-10abbf2c25e2",
779
+ "metadata": {},
780
+ "outputs": [
781
+ {
782
+ "data": {
783
+ "text/plain": [
784
+ "np.float64(152.0)"
785
+ ]
786
+ },
787
+ "execution_count": 74,
788
+ "metadata": {},
789
+ "output_type": "execute_result"
790
+ }
791
+ ],
792
+ "source": [
793
+ "# alternatively mode gives the max freq count\n",
794
+ "mode_horsepower = df['horsepower'].mode()[0]\n",
795
+ "mode_horsepower"
796
+ ]
797
+ },
798
+ {
799
+ "cell_type": "code",
800
+ "execution_count": 80,
801
+ "id": "bd17e63f-c5c1-4d8a-8ba5-1b8e106175fc",
802
+ "metadata": {},
803
+ "outputs": [],
804
+ "source": [
805
+ "# fill the missing values in the col with mode\n",
806
+ "df['horsepower'].fillna(mode_horsepower, inplace=True)"
807
+ ]
808
+ },
809
+ {
810
+ "cell_type": "code",
811
+ "execution_count": 81,
812
+ "id": "e7dc6b1a-323a-4f88-b475-f76059759e66",
813
+ "metadata": {},
814
+ "outputs": [
815
+ {
816
+ "data": {
817
+ "text/plain": [
818
+ "engine_displacement 0\n",
819
+ "num_cylinders 482\n",
820
+ "horsepower 0\n",
821
+ "vehicle_weight 0\n",
822
+ "acceleration 930\n",
823
+ "model_year 0\n",
824
+ "origin 0\n",
825
+ "fuel_type 0\n",
826
+ "drivetrain 0\n",
827
+ "num_doors 502\n",
828
+ "fuel_efficiency_mpg 0\n",
829
+ "dtype: int64"
830
+ ]
831
+ },
832
+ "execution_count": 81,
833
+ "metadata": {},
834
+ "output_type": "execute_result"
835
+ }
836
+ ],
837
+ "source": [
838
+ "# check if null values are removed or not\n",
839
+ "df.isnull().sum()"
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "markdown",
844
+ "id": "3eaf5439-3f99-4506-bdd8-ca35e03c18bf",
845
+ "metadata": {},
846
+ "source": [
847
+ "Clearly the null values have been imputed"
848
+ ]
849
+ },
850
+ {
851
+ "cell_type": "code",
852
+ "execution_count": 82,
853
+ "id": "94dc61c5-bbcb-47f4-9370-f3238c26e2a2",
854
+ "metadata": {},
855
+ "outputs": [
856
+ {
857
+ "data": {
858
+ "text/plain": [
859
+ "152.0"
860
+ ]
861
+ },
862
+ "execution_count": 82,
863
+ "metadata": {},
864
+ "output_type": "execute_result"
865
+ }
866
+ ],
867
+ "source": [
868
+ "# now recalculate the median\n",
869
+ "df['horsepower'].median()"
870
+ ]
871
+ },
872
+ {
873
+ "cell_type": "markdown",
874
+ "id": "32337e0a-dbfc-4e96-9fab-15723b3a5166",
875
+ "metadata": {},
876
+ "source": [
877
+ "## 6. Model building"
878
+ ]
879
+ },
880
+ {
881
+ "cell_type": "code",
882
+ "execution_count": 84,
883
+ "id": "a28d7bfb-3f4d-4018-8881-b39bf43d4089",
884
+ "metadata": {},
885
+ "outputs": [
886
+ {
887
+ "data": {
888
+ "text/html": [
889
+ "<div>\n",
890
+ "<style scoped>\n",
891
+ " .dataframe tbody tr th:only-of-type {\n",
892
+ " vertical-align: middle;\n",
893
+ " }\n",
894
+ "\n",
895
+ " .dataframe tbody tr th {\n",
896
+ " vertical-align: top;\n",
897
+ " }\n",
898
+ "\n",
899
+ " .dataframe thead th {\n",
900
+ " text-align: right;\n",
901
+ " }\n",
902
+ "</style>\n",
903
+ "<table border=\"1\" class=\"dataframe\">\n",
904
+ " <thead>\n",
905
+ " <tr style=\"text-align: right;\">\n",
906
+ " <th></th>\n",
907
+ " <th>engine_displacement</th>\n",
908
+ " <th>num_cylinders</th>\n",
909
+ " <th>horsepower</th>\n",
910
+ " <th>vehicle_weight</th>\n",
911
+ " <th>acceleration</th>\n",
912
+ " <th>model_year</th>\n",
913
+ " <th>origin</th>\n",
914
+ " <th>fuel_type</th>\n",
915
+ " <th>drivetrain</th>\n",
916
+ " <th>num_doors</th>\n",
917
+ " <th>fuel_efficiency_mpg</th>\n",
918
+ " </tr>\n",
919
+ " </thead>\n",
920
+ " <tbody>\n",
921
+ " <tr>\n",
922
+ " <th>0</th>\n",
923
+ " <td>170</td>\n",
924
+ " <td>3.0</td>\n",
925
+ " <td>159.0</td>\n",
926
+ " <td>3413.433759</td>\n",
927
+ " <td>17.7</td>\n",
928
+ " <td>2003</td>\n",
929
+ " <td>Europe</td>\n",
930
+ " <td>Gasoline</td>\n",
931
+ " <td>All-wheel drive</td>\n",
932
+ " <td>0.0</td>\n",
933
+ " <td>13.231729</td>\n",
934
+ " </tr>\n",
935
+ " <tr>\n",
936
+ " <th>1</th>\n",
937
+ " <td>130</td>\n",
938
+ " <td>5.0</td>\n",
939
+ " <td>97.0</td>\n",
940
+ " <td>3149.664934</td>\n",
941
+ " <td>17.8</td>\n",
942
+ " <td>2007</td>\n",
943
+ " <td>USA</td>\n",
944
+ " <td>Gasoline</td>\n",
945
+ " <td>Front-wheel drive</td>\n",
946
+ " <td>0.0</td>\n",
947
+ " <td>13.688217</td>\n",
948
+ " </tr>\n",
949
+ " <tr>\n",
950
+ " <th>2</th>\n",
951
+ " <td>170</td>\n",
952
+ " <td>NaN</td>\n",
953
+ " <td>78.0</td>\n",
954
+ " <td>3079.038997</td>\n",
955
+ " <td>15.1</td>\n",
956
+ " <td>2018</td>\n",
957
+ " <td>Europe</td>\n",
958
+ " <td>Gasoline</td>\n",
959
+ " <td>Front-wheel drive</td>\n",
960
+ " <td>0.0</td>\n",
961
+ " <td>14.246341</td>\n",
962
+ " </tr>\n",
963
+ " <tr>\n",
964
+ " <th>3</th>\n",
965
+ " <td>220</td>\n",
966
+ " <td>4.0</td>\n",
967
+ " <td>152.0</td>\n",
968
+ " <td>2542.392402</td>\n",
969
+ " <td>20.2</td>\n",
970
+ " <td>2009</td>\n",
971
+ " <td>USA</td>\n",
972
+ " <td>Diesel</td>\n",
973
+ " <td>All-wheel drive</td>\n",
974
+ " <td>2.0</td>\n",
975
+ " <td>16.912736</td>\n",
976
+ " </tr>\n",
977
+ " <tr>\n",
978
+ " <th>4</th>\n",
979
+ " <td>210</td>\n",
980
+ " <td>1.0</td>\n",
981
+ " <td>140.0</td>\n",
982
+ " <td>3460.870990</td>\n",
983
+ " <td>14.4</td>\n",
984
+ " <td>2009</td>\n",
985
+ " <td>Europe</td>\n",
986
+ " <td>Gasoline</td>\n",
987
+ " <td>All-wheel drive</td>\n",
988
+ " <td>2.0</td>\n",
989
+ " <td>12.488369</td>\n",
990
+ " </tr>\n",
991
+ " </tbody>\n",
992
+ "</table>\n",
993
+ "</div>"
994
+ ],
995
+ "text/plain": [
996
+ " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
997
+ "0 170 3.0 159.0 3413.433759 \n",
998
+ "1 130 5.0 97.0 3149.664934 \n",
999
+ "2 170 NaN 78.0 3079.038997 \n",
1000
+ "3 220 4.0 152.0 2542.392402 \n",
1001
+ "4 210 1.0 140.0 3460.870990 \n",
1002
+ "\n",
1003
+ " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
1004
+ "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
1005
+ "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
1006
+ "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
1007
+ "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
1008
+ "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
1009
+ "\n",
1010
+ " fuel_efficiency_mpg \n",
1011
+ "0 13.231729 \n",
1012
+ "1 13.688217 \n",
1013
+ "2 14.246341 \n",
1014
+ "3 16.912736 \n",
1015
+ "4 12.488369 "
1016
+ ]
1017
+ },
1018
+ "execution_count": 84,
1019
+ "metadata": {},
1020
+ "output_type": "execute_result"
1021
+ }
1022
+ ],
1023
+ "source": [
1024
+ "df.head()"
1025
+ ]
1026
+ },
1027
+ {
1028
+ "cell_type": "code",
1029
+ "execution_count": 83,
1030
+ "id": "30057fab-fad4-44ae-9b9b-2aae11614f84",
1031
+ "metadata": {},
1032
+ "outputs": [
1033
+ {
1034
+ "data": {
1035
+ "text/plain": [
1036
+ "0 False\n",
1037
+ "1 False\n",
1038
+ "2 False\n",
1039
+ "3 False\n",
1040
+ "4 False\n",
1041
+ "Name: origin, dtype: bool"
1042
+ ]
1043
+ },
1044
+ "execution_count": 83,
1045
+ "metadata": {},
1046
+ "output_type": "execute_result"
1047
+ }
1048
+ ],
1049
+ "source": [
1050
+ "mask_asia.head()"
1051
+ ]
1052
+ },
1053
+ {
1054
+ "cell_type": "code",
1055
+ "execution_count": 88,
1056
+ "id": "dbaa1132-9a2f-411a-9668-b5110109e3aa",
1057
+ "metadata": {},
1058
+ "outputs": [],
1059
+ "source": [
1060
+ "columns_to_keep = ['vehicle_weight', 'model_year']"
1061
+ ]
1062
+ },
1063
+ {
1064
+ "cell_type": "code",
1065
+ "execution_count": 94,
1066
+ "id": "c37eb7f0-4e38-4a8d-b5a0-f54ba43ef6c7",
1067
+ "metadata": {},
1068
+ "outputs": [
1069
+ {
1070
+ "data": {
1071
+ "text/html": [
1072
+ "<div>\n",
1073
+ "<style scoped>\n",
1074
+ " .dataframe tbody tr th:only-of-type {\n",
1075
+ " vertical-align: middle;\n",
1076
+ " }\n",
1077
+ "\n",
1078
+ " .dataframe tbody tr th {\n",
1079
+ " vertical-align: top;\n",
1080
+ " }\n",
1081
+ "\n",
1082
+ " .dataframe thead th {\n",
1083
+ " text-align: right;\n",
1084
+ " }\n",
1085
+ "</style>\n",
1086
+ "<table border=\"1\" class=\"dataframe\">\n",
1087
+ " <thead>\n",
1088
+ " <tr style=\"text-align: right;\">\n",
1089
+ " <th></th>\n",
1090
+ " <th>vehicle_weight</th>\n",
1091
+ " <th>model_year</th>\n",
1092
+ " </tr>\n",
1093
+ " </thead>\n",
1094
+ " <tbody>\n",
1095
+ " <tr>\n",
1096
+ " <th>8</th>\n",
1097
+ " <td>2714.219310</td>\n",
1098
+ " <td>2016</td>\n",
1099
+ " </tr>\n",
1100
+ " <tr>\n",
1101
+ " <th>12</th>\n",
1102
+ " <td>2783.868974</td>\n",
1103
+ " <td>2010</td>\n",
1104
+ " </tr>\n",
1105
+ " <tr>\n",
1106
+ " <th>14</th>\n",
1107
+ " <td>3582.687368</td>\n",
1108
+ " <td>2007</td>\n",
1109
+ " </tr>\n",
1110
+ " <tr>\n",
1111
+ " <th>20</th>\n",
1112
+ " <td>2231.808142</td>\n",
1113
+ " <td>2011</td>\n",
1114
+ " </tr>\n",
1115
+ " <tr>\n",
1116
+ " <th>21</th>\n",
1117
+ " <td>2659.431451</td>\n",
1118
+ " <td>2016</td>\n",
1119
+ " </tr>\n",
1120
+ " <tr>\n",
1121
+ " <th>34</th>\n",
1122
+ " <td>2844.227534</td>\n",
1123
+ " <td>2014</td>\n",
1124
+ " </tr>\n",
1125
+ " <tr>\n",
1126
+ " <th>38</th>\n",
1127
+ " <td>3761.994038</td>\n",
1128
+ " <td>2019</td>\n",
1129
+ " </tr>\n",
1130
+ " </tbody>\n",
1131
+ "</table>\n",
1132
+ "</div>"
1133
+ ],
1134
+ "text/plain": [
1135
+ " vehicle_weight model_year\n",
1136
+ "8 2714.219310 2016\n",
1137
+ "12 2783.868974 2010\n",
1138
+ "14 3582.687368 2007\n",
1139
+ "20 2231.808142 2011\n",
1140
+ "21 2659.431451 2016\n",
1141
+ "34 2844.227534 2014\n",
1142
+ "38 3761.994038 2019"
1143
+ ]
1144
+ },
1145
+ "execution_count": 94,
1146
+ "metadata": {},
1147
+ "output_type": "execute_result"
1148
+ }
1149
+ ],
1150
+ "source": [
1151
+ "# subset the asian data\n",
1152
+ "df_asia = df[mask_asia]\n",
1153
+ "df_asia_final = df_asia[columns_to_keep].head(7)\n",
1154
+ "df_asia_final"
1155
+ ]
1156
+ },
1157
+ {
1158
+ "cell_type": "code",
1159
+ "execution_count": 100,
1160
+ "id": "89abd22c-7cc2-49b9-8afd-4e824f4360c7",
1161
+ "metadata": {},
1162
+ "outputs": [
1163
+ {
1164
+ "data": {
1165
+ "text/plain": [
1166
+ "(7, 2)"
1167
+ ]
1168
+ },
1169
+ "execution_count": 100,
1170
+ "metadata": {},
1171
+ "output_type": "execute_result"
1172
+ }
1173
+ ],
1174
+ "source": [
1175
+ "# get the underlying numpy array\n",
1176
+ "X = np.array(df_asia_final)\n",
1177
+ "X.shape"
1178
+ ]
1179
+ },
1180
+ {
1181
+ "cell_type": "code",
1182
+ "execution_count": 110,
1183
+ "id": "252a6e2f-c7f9-4c30-b74a-8b4e1ea876ab",
1184
+ "metadata": {},
1185
+ "outputs": [
1186
+ {
1187
+ "data": {
1188
+ "text/plain": [
1189
+ "(2, 2)"
1190
+ ]
1191
+ },
1192
+ "execution_count": 110,
1193
+ "metadata": {},
1194
+ "output_type": "execute_result"
1195
+ }
1196
+ ],
1197
+ "source": [
1198
+ "# take the dot product with the traspose (7,2).(2,7) -> (7,7)\n",
1199
+ "XTX = X.T @ X\n",
1200
+ "XTX.shape"
1201
+ ]
1202
+ },
1203
+ {
1204
+ "cell_type": "code",
1205
+ "execution_count": 111,
1206
+ "id": "63342692-a307-48cd-a6cf-bdfc8e1985c1",
1207
+ "metadata": {},
1208
+ "outputs": [
1209
+ {
1210
+ "data": {
1211
+ "text/plain": [
1212
+ "(2, 2)"
1213
+ ]
1214
+ },
1215
+ "execution_count": 111,
1216
+ "metadata": {},
1217
+ "output_type": "execute_result"
1218
+ }
1219
+ ],
1220
+ "source": [
1221
+ "XTX_inv = np.linalg.inv(XTX)\n",
1222
+ "XTX_inv.shape"
1223
+ ]
1224
+ },
1225
+ {
1226
+ "cell_type": "code",
1227
+ "execution_count": 112,
1228
+ "id": "e4b0a33e-ee66-48d3-82d4-7953e0a64461",
1229
+ "metadata": {},
1230
+ "outputs": [
1231
+ {
1232
+ "data": {
1233
+ "text/plain": [
1234
+ "array([1100, 1300, 800, 900, 1000, 1100, 1200])"
1235
+ ]
1236
+ },
1237
+ "execution_count": 112,
1238
+ "metadata": {},
1239
+ "output_type": "execute_result"
1240
+ }
1241
+ ],
1242
+ "source": [
1243
+ "# Create an array y with values \n",
1244
+ "y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])\n",
1245
+ "y "
1246
+ ]
1247
+ },
1248
+ {
1249
+ "cell_type": "code",
1250
+ "execution_count": 114,
1251
+ "id": "e5b0bf7e-9e5d-46c8-9d47-4555f05bfc6f",
1252
+ "metadata": {},
1253
+ "outputs": [],
1254
+ "source": [
1255
+ "# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w\n",
1256
+ "step = XTX_inv @ X.T\n",
1257
+ "w = step @ y"
1258
+ ]
1259
+ },
1260
+ {
1261
+ "cell_type": "code",
1262
+ "execution_count": 115,
1263
+ "id": "1ddb3d1f-b877-4c66-9245-098cd63b850a",
1264
+ "metadata": {},
1265
+ "outputs": [
1266
+ {
1267
+ "data": {
1268
+ "text/plain": [
1269
+ "np.float64(0.5187709081074016)"
1270
+ ]
1271
+ },
1272
+ "execution_count": 115,
1273
+ "metadata": {},
1274
+ "output_type": "execute_result"
1275
+ }
1276
+ ],
1277
+ "source": [
1278
+ "# sum of all the elements of the result / weights\n",
1279
+ "np.sum(w)"
1280
+ ]
1281
+ },
1282
+ {
1283
+ "cell_type": "markdown",
1284
+ "id": "5cad1468-2329-4fa3-9b91-0dc30dffafbc",
1285
+ "metadata": {},
1286
+ "source": [
1287
+ "## End of Week 1"
1288
+ ]
1289
+ },
1290
+ {
1291
+ "cell_type": "code",
1292
+ "execution_count": null,
1293
+ "id": "2bbec182-f585-43fa-9960-ca979139c0e2",
1294
+ "metadata": {},
1295
+ "outputs": [],
1296
+ "source": []
1297
+ }
1298
+ ],
1299
+ "metadata": {
1300
+ "kernelspec": {
1301
+ "display_name": "Python 3 (ipykernel)",
1302
+ "language": "python",
1303
+ "name": "python3"
1304
+ },
1305
+ "language_info": {
1306
+ "codemirror_mode": {
1307
+ "name": "ipython",
1308
+ "version": 3
1309
+ },
1310
+ "file_extension": ".py",
1311
+ "mimetype": "text/x-python",
1312
+ "name": "python",
1313
+ "nbconvert_exporter": "python",
1314
+ "pygments_lexer": "ipython3",
1315
+ "version": "3.11.13"
1316
+ }
1317
+ },
1318
+ "nbformat": 4,
1319
+ "nbformat_minor": 5
1320
+ }
Week 1/car_fuel_efficiency.csv ADDED
The diff for this file is too large to render. See raw diff