sks01dev commited on
Commit
925b9ce
Β·
verified Β·
1 Parent(s): d5bb117

Delete Week 1

Browse files
Week 1/Week 1.ipynb DELETED
@@ -1,1320 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 96,
6
- "id": "52e4d5b0-5142-488c-afe8-da951cce0ec5",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import pandas as pd\n",
11
- "import warnings\n",
12
- "warnings.filterwarnings('ignore')\n",
13
- "import numpy as np"
14
- ]
15
- },
16
- {
17
- "cell_type": "code",
18
- "execution_count": 2,
19
- "id": "e6eee592-2f01-482a-bf99-2140ba125d70",
20
- "metadata": {},
21
- "outputs": [
22
- {
23
- "data": {
24
- "text/plain": [
25
- "'2.3.2'"
26
- ]
27
- },
28
- "execution_count": 2,
29
- "metadata": {},
30
- "output_type": "execute_result"
31
- }
32
- ],
33
- "source": [
34
- "pd.__version__"
35
- ]
36
- },
37
- {
38
- "cell_type": "code",
39
- "execution_count": 55,
40
- "id": "40099d81-2fd2-41cd-ae18-093e7174f8fb",
41
- "metadata": {},
42
- "outputs": [],
43
- "source": [
44
- "df = pd.read_csv(\"car_fuel_efficiency.csv\")"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": 56,
50
- "id": "d7432f7f-c628-428e-b7ba-f932f85c0469",
51
- "metadata": {},
52
- "outputs": [
53
- {
54
- "data": {
55
- "text/html": [
56
- "<div>\n",
57
- "<style scoped>\n",
58
- " .dataframe tbody tr th:only-of-type {\n",
59
- " vertical-align: middle;\n",
60
- " }\n",
61
- "\n",
62
- " .dataframe tbody tr th {\n",
63
- " vertical-align: top;\n",
64
- " }\n",
65
- "\n",
66
- " .dataframe thead th {\n",
67
- " text-align: right;\n",
68
- " }\n",
69
- "</style>\n",
70
- "<table border=\"1\" class=\"dataframe\">\n",
71
- " <thead>\n",
72
- " <tr style=\"text-align: right;\">\n",
73
- " <th></th>\n",
74
- " <th>engine_displacement</th>\n",
75
- " <th>num_cylinders</th>\n",
76
- " <th>horsepower</th>\n",
77
- " <th>vehicle_weight</th>\n",
78
- " <th>acceleration</th>\n",
79
- " <th>model_year</th>\n",
80
- " <th>origin</th>\n",
81
- " <th>fuel_type</th>\n",
82
- " <th>drivetrain</th>\n",
83
- " <th>num_doors</th>\n",
84
- " <th>fuel_efficiency_mpg</th>\n",
85
- " </tr>\n",
86
- " </thead>\n",
87
- " <tbody>\n",
88
- " <tr>\n",
89
- " <th>0</th>\n",
90
- " <td>170</td>\n",
91
- " <td>3.0</td>\n",
92
- " <td>159.0</td>\n",
93
- " <td>3413.433759</td>\n",
94
- " <td>17.7</td>\n",
95
- " <td>2003</td>\n",
96
- " <td>Europe</td>\n",
97
- " <td>Gasoline</td>\n",
98
- " <td>All-wheel drive</td>\n",
99
- " <td>0.0</td>\n",
100
- " <td>13.231729</td>\n",
101
- " </tr>\n",
102
- " <tr>\n",
103
- " <th>1</th>\n",
104
- " <td>130</td>\n",
105
- " <td>5.0</td>\n",
106
- " <td>97.0</td>\n",
107
- " <td>3149.664934</td>\n",
108
- " <td>17.8</td>\n",
109
- " <td>2007</td>\n",
110
- " <td>USA</td>\n",
111
- " <td>Gasoline</td>\n",
112
- " <td>Front-wheel drive</td>\n",
113
- " <td>0.0</td>\n",
114
- " <td>13.688217</td>\n",
115
- " </tr>\n",
116
- " <tr>\n",
117
- " <th>2</th>\n",
118
- " <td>170</td>\n",
119
- " <td>NaN</td>\n",
120
- " <td>78.0</td>\n",
121
- " <td>3079.038997</td>\n",
122
- " <td>15.1</td>\n",
123
- " <td>2018</td>\n",
124
- " <td>Europe</td>\n",
125
- " <td>Gasoline</td>\n",
126
- " <td>Front-wheel drive</td>\n",
127
- " <td>0.0</td>\n",
128
- " <td>14.246341</td>\n",
129
- " </tr>\n",
130
- " <tr>\n",
131
- " <th>3</th>\n",
132
- " <td>220</td>\n",
133
- " <td>4.0</td>\n",
134
- " <td>NaN</td>\n",
135
- " <td>2542.392402</td>\n",
136
- " <td>20.2</td>\n",
137
- " <td>2009</td>\n",
138
- " <td>USA</td>\n",
139
- " <td>Diesel</td>\n",
140
- " <td>All-wheel drive</td>\n",
141
- " <td>2.0</td>\n",
142
- " <td>16.912736</td>\n",
143
- " </tr>\n",
144
- " <tr>\n",
145
- " <th>4</th>\n",
146
- " <td>210</td>\n",
147
- " <td>1.0</td>\n",
148
- " <td>140.0</td>\n",
149
- " <td>3460.870990</td>\n",
150
- " <td>14.4</td>\n",
151
- " <td>2009</td>\n",
152
- " <td>Europe</td>\n",
153
- " <td>Gasoline</td>\n",
154
- " <td>All-wheel drive</td>\n",
155
- " <td>2.0</td>\n",
156
- " <td>12.488369</td>\n",
157
- " </tr>\n",
158
- " </tbody>\n",
159
- "</table>\n",
160
- "</div>"
161
- ],
162
- "text/plain": [
163
- " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
164
- "0 170 3.0 159.0 3413.433759 \n",
165
- "1 130 5.0 97.0 3149.664934 \n",
166
- "2 170 NaN 78.0 3079.038997 \n",
167
- "3 220 4.0 NaN 2542.392402 \n",
168
- "4 210 1.0 140.0 3460.870990 \n",
169
- "\n",
170
- " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
171
- "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
172
- "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
173
- "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
174
- "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
175
- "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
176
- "\n",
177
- " fuel_efficiency_mpg \n",
178
- "0 13.231729 \n",
179
- "1 13.688217 \n",
180
- "2 14.246341 \n",
181
- "3 16.912736 \n",
182
- "4 12.488369 "
183
- ]
184
- },
185
- "execution_count": 56,
186
- "metadata": {},
187
- "output_type": "execute_result"
188
- }
189
- ],
190
- "source": [
191
- "df.head()"
192
- ]
193
- },
194
- {
195
- "cell_type": "markdown",
196
- "id": "2b12116a-060a-48a5-afe9-fdde99e53fce",
197
- "metadata": {},
198
- "source": [
199
- "## 1. No. of Records"
200
- ]
201
- },
202
- {
203
- "cell_type": "code",
204
- "execution_count": 57,
205
- "id": "515593f3-b510-48c2-9067-d9f8ffec3062",
206
- "metadata": {},
207
- "outputs": [
208
- {
209
- "data": {
210
- "text/plain": [
211
- "(9704, 11)"
212
- ]
213
- },
214
- "execution_count": 57,
215
- "metadata": {},
216
- "output_type": "execute_result"
217
- }
218
- ],
219
- "source": [
220
- "df.shape"
221
- ]
222
- },
223
- {
224
- "cell_type": "markdown",
225
- "id": "2a91bf5f-4be4-49b1-bf26-299b71da934a",
226
- "metadata": {},
227
- "source": [
228
- "## 2. Distinct fuel types"
229
- ]
230
- },
231
- {
232
- "cell_type": "code",
233
- "execution_count": 58,
234
- "id": "28148b46-460f-4dc0-895e-9b445b2c5cca",
235
- "metadata": {},
236
- "outputs": [
237
- {
238
- "data": {
239
- "text/plain": [
240
- "0 Gasoline\n",
241
- "1 Gasoline\n",
242
- "2 Gasoline\n",
243
- "3 Diesel\n",
244
- "4 Gasoline\n",
245
- "Name: fuel_type, dtype: object"
246
- ]
247
- },
248
- "execution_count": 58,
249
- "metadata": {},
250
- "output_type": "execute_result"
251
- }
252
- ],
253
- "source": [
254
- "fuels = df['fuel_type']\n",
255
- "fuels.head()"
256
- ]
257
- },
258
- {
259
- "cell_type": "code",
260
- "execution_count": 59,
261
- "id": "a94560b0-3240-45c0-b075-28873370b87f",
262
- "metadata": {},
263
- "outputs": [
264
- {
265
- "data": {
266
- "text/plain": [
267
- "2"
268
- ]
269
- },
270
- "execution_count": 59,
271
- "metadata": {},
272
- "output_type": "execute_result"
273
- }
274
- ],
275
- "source": [
276
- "fuels.nunique()"
277
- ]
278
- },
279
- {
280
- "cell_type": "markdown",
281
- "id": "48bb4b87-2a21-408e-866c-8d4bacc57caa",
282
- "metadata": {},
283
- "source": [
284
- "## 3. Null Values"
285
- ]
286
- },
287
- {
288
- "cell_type": "code",
289
- "execution_count": 60,
290
- "id": "7f2d85e3-672f-4e09-a48c-1f26a5627c2d",
291
- "metadata": {},
292
- "outputs": [
293
- {
294
- "data": {
295
- "text/plain": [
296
- "engine_displacement 0\n",
297
- "num_cylinders 482\n",
298
- "horsepower 708\n",
299
- "vehicle_weight 0\n",
300
- "acceleration 930\n",
301
- "model_year 0\n",
302
- "origin 0\n",
303
- "fuel_type 0\n",
304
- "drivetrain 0\n",
305
- "num_doors 502\n",
306
- "fuel_efficiency_mpg 0\n",
307
- "dtype: int64"
308
- ]
309
- },
310
- "execution_count": 60,
311
- "metadata": {},
312
- "output_type": "execute_result"
313
- }
314
- ],
315
- "source": [
316
- "df.isnull().sum()"
317
- ]
318
- },
319
- {
320
- "cell_type": "markdown",
321
- "id": "98f06e86-38d3-4441-bafe-22f9089f6ee2",
322
- "metadata": {},
323
- "source": [
324
- "Clearly, in fuel types there is no missing values"
325
- ]
326
- },
327
- {
328
- "cell_type": "markdown",
329
- "id": "f052a64b-db12-4342-96d0-3f5ca215cca0",
330
- "metadata": {},
331
- "source": [
332
- "## 4. Max fuel efficiency "
333
- ]
334
- },
335
- {
336
- "cell_type": "code",
337
- "execution_count": 61,
338
- "id": "de01f839-411a-4ed1-bc49-f479174cd8b3",
339
- "metadata": {},
340
- "outputs": [
341
- {
342
- "data": {
343
- "text/plain": [
344
- "Index(['engine_displacement', 'num_cylinders', 'horsepower', 'vehicle_weight',\n",
345
- " 'acceleration', 'model_year', 'origin', 'fuel_type', 'drivetrain',\n",
346
- " 'num_doors', 'fuel_efficiency_mpg'],\n",
347
- " dtype='object')"
348
- ]
349
- },
350
- "execution_count": 61,
351
- "metadata": {},
352
- "output_type": "execute_result"
353
- }
354
- ],
355
- "source": [
356
- "df.columns"
357
- ]
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": 62,
362
- "id": "98a48cd5-38df-4736-b873-1d78c1546bde",
363
- "metadata": {
364
- "scrolled": true
365
- },
366
- "outputs": [
367
- {
368
- "data": {
369
- "text/html": [
370
- "<div>\n",
371
- "<style scoped>\n",
372
- " .dataframe tbody tr th:only-of-type {\n",
373
- " vertical-align: middle;\n",
374
- " }\n",
375
- "\n",
376
- " .dataframe tbody tr th {\n",
377
- " vertical-align: top;\n",
378
- " }\n",
379
- "\n",
380
- " .dataframe thead th {\n",
381
- " text-align: right;\n",
382
- " }\n",
383
- "</style>\n",
384
- "<table border=\"1\" class=\"dataframe\">\n",
385
- " <thead>\n",
386
- " <tr style=\"text-align: right;\">\n",
387
- " <th></th>\n",
388
- " <th>engine_displacement</th>\n",
389
- " <th>num_cylinders</th>\n",
390
- " <th>horsepower</th>\n",
391
- " <th>vehicle_weight</th>\n",
392
- " <th>acceleration</th>\n",
393
- " <th>model_year</th>\n",
394
- " <th>origin</th>\n",
395
- " <th>fuel_type</th>\n",
396
- " <th>drivetrain</th>\n",
397
- " <th>num_doors</th>\n",
398
- " <th>fuel_efficiency_mpg</th>\n",
399
- " </tr>\n",
400
- " </thead>\n",
401
- " <tbody>\n",
402
- " <tr>\n",
403
- " <th>0</th>\n",
404
- " <td>170</td>\n",
405
- " <td>3.0</td>\n",
406
- " <td>159.0</td>\n",
407
- " <td>3413.433759</td>\n",
408
- " <td>17.7</td>\n",
409
- " <td>2003</td>\n",
410
- " <td>Europe</td>\n",
411
- " <td>Gasoline</td>\n",
412
- " <td>All-wheel drive</td>\n",
413
- " <td>0.0</td>\n",
414
- " <td>13.231729</td>\n",
415
- " </tr>\n",
416
- " <tr>\n",
417
- " <th>1</th>\n",
418
- " <td>130</td>\n",
419
- " <td>5.0</td>\n",
420
- " <td>97.0</td>\n",
421
- " <td>3149.664934</td>\n",
422
- " <td>17.8</td>\n",
423
- " <td>2007</td>\n",
424
- " <td>USA</td>\n",
425
- " <td>Gasoline</td>\n",
426
- " <td>Front-wheel drive</td>\n",
427
- " <td>0.0</td>\n",
428
- " <td>13.688217</td>\n",
429
- " </tr>\n",
430
- " <tr>\n",
431
- " <th>2</th>\n",
432
- " <td>170</td>\n",
433
- " <td>NaN</td>\n",
434
- " <td>78.0</td>\n",
435
- " <td>3079.038997</td>\n",
436
- " <td>15.1</td>\n",
437
- " <td>2018</td>\n",
438
- " <td>Europe</td>\n",
439
- " <td>Gasoline</td>\n",
440
- " <td>Front-wheel drive</td>\n",
441
- " <td>0.0</td>\n",
442
- " <td>14.246341</td>\n",
443
- " </tr>\n",
444
- " <tr>\n",
445
- " <th>3</th>\n",
446
- " <td>220</td>\n",
447
- " <td>4.0</td>\n",
448
- " <td>NaN</td>\n",
449
- " <td>2542.392402</td>\n",
450
- " <td>20.2</td>\n",
451
- " <td>2009</td>\n",
452
- " <td>USA</td>\n",
453
- " <td>Diesel</td>\n",
454
- " <td>All-wheel drive</td>\n",
455
- " <td>2.0</td>\n",
456
- " <td>16.912736</td>\n",
457
- " </tr>\n",
458
- " <tr>\n",
459
- " <th>4</th>\n",
460
- " <td>210</td>\n",
461
- " <td>1.0</td>\n",
462
- " <td>140.0</td>\n",
463
- " <td>3460.870990</td>\n",
464
- " <td>14.4</td>\n",
465
- " <td>2009</td>\n",
466
- " <td>Europe</td>\n",
467
- " <td>Gasoline</td>\n",
468
- " <td>All-wheel drive</td>\n",
469
- " <td>2.0</td>\n",
470
- " <td>12.488369</td>\n",
471
- " </tr>\n",
472
- " </tbody>\n",
473
- "</table>\n",
474
- "</div>"
475
- ],
476
- "text/plain": [
477
- " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
478
- "0 170 3.0 159.0 3413.433759 \n",
479
- "1 130 5.0 97.0 3149.664934 \n",
480
- "2 170 NaN 78.0 3079.038997 \n",
481
- "3 220 4.0 NaN 2542.392402 \n",
482
- "4 210 1.0 140.0 3460.870990 \n",
483
- "\n",
484
- " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
485
- "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
486
- "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
487
- "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
488
- "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
489
- "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
490
- "\n",
491
- " fuel_efficiency_mpg \n",
492
- "0 13.231729 \n",
493
- "1 13.688217 \n",
494
- "2 14.246341 \n",
495
- "3 16.912736 \n",
496
- "4 12.488369 "
497
- ]
498
- },
499
- "execution_count": 62,
500
- "metadata": {},
501
- "output_type": "execute_result"
502
- }
503
- ],
504
- "source": [
505
- "df.head()"
506
- ]
507
- },
508
- {
509
- "cell_type": "code",
510
- "execution_count": 63,
511
- "id": "25faf234-3f06-4f9b-bd07-0f25de03ee1c",
512
- "metadata": {},
513
- "outputs": [
514
- {
515
- "data": {
516
- "text/html": [
517
- "<div>\n",
518
- "<style scoped>\n",
519
- " .dataframe tbody tr th:only-of-type {\n",
520
- " vertical-align: middle;\n",
521
- " }\n",
522
- "\n",
523
- " .dataframe tbody tr th {\n",
524
- " vertical-align: top;\n",
525
- " }\n",
526
- "\n",
527
- " .dataframe thead th {\n",
528
- " text-align: right;\n",
529
- " }\n",
530
- "</style>\n",
531
- "<table border=\"1\" class=\"dataframe\">\n",
532
- " <thead>\n",
533
- " <tr style=\"text-align: right;\">\n",
534
- " <th></th>\n",
535
- " <th>engine_displacement</th>\n",
536
- " <th>num_cylinders</th>\n",
537
- " <th>horsepower</th>\n",
538
- " <th>vehicle_weight</th>\n",
539
- " <th>acceleration</th>\n",
540
- " <th>model_year</th>\n",
541
- " <th>origin</th>\n",
542
- " <th>fuel_type</th>\n",
543
- " <th>drivetrain</th>\n",
544
- " <th>num_doors</th>\n",
545
- " <th>fuel_efficiency_mpg</th>\n",
546
- " </tr>\n",
547
- " </thead>\n",
548
- " <tbody>\n",
549
- " <tr>\n",
550
- " <th>8</th>\n",
551
- " <td>250</td>\n",
552
- " <td>1.0</td>\n",
553
- " <td>174.0</td>\n",
554
- " <td>2714.219310</td>\n",
555
- " <td>10.3</td>\n",
556
- " <td>2016</td>\n",
557
- " <td>Asia</td>\n",
558
- " <td>Diesel</td>\n",
559
- " <td>Front-wheel drive</td>\n",
560
- " <td>-1.0</td>\n",
561
- " <td>16.823554</td>\n",
562
- " </tr>\n",
563
- " <tr>\n",
564
- " <th>12</th>\n",
565
- " <td>320</td>\n",
566
- " <td>5.0</td>\n",
567
- " <td>145.0</td>\n",
568
- " <td>2783.868974</td>\n",
569
- " <td>15.1</td>\n",
570
- " <td>2010</td>\n",
571
- " <td>Asia</td>\n",
572
- " <td>Diesel</td>\n",
573
- " <td>All-wheel drive</td>\n",
574
- " <td>1.0</td>\n",
575
- " <td>16.175820</td>\n",
576
- " </tr>\n",
577
- " <tr>\n",
578
- " <th>14</th>\n",
579
- " <td>200</td>\n",
580
- " <td>6.0</td>\n",
581
- " <td>160.0</td>\n",
582
- " <td>3582.687368</td>\n",
583
- " <td>14.9</td>\n",
584
- " <td>2007</td>\n",
585
- " <td>Asia</td>\n",
586
- " <td>Diesel</td>\n",
587
- " <td>All-wheel drive</td>\n",
588
- " <td>0.0</td>\n",
589
- " <td>11.871091</td>\n",
590
- " </tr>\n",
591
- " <tr>\n",
592
- " <th>20</th>\n",
593
- " <td>150</td>\n",
594
- " <td>3.0</td>\n",
595
- " <td>197.0</td>\n",
596
- " <td>2231.808142</td>\n",
597
- " <td>18.7</td>\n",
598
- " <td>2011</td>\n",
599
- " <td>Asia</td>\n",
600
- " <td>Gasoline</td>\n",
601
- " <td>Front-wheel drive</td>\n",
602
- " <td>1.0</td>\n",
603
- " <td>18.889083</td>\n",
604
- " </tr>\n",
605
- " <tr>\n",
606
- " <th>21</th>\n",
607
- " <td>160</td>\n",
608
- " <td>4.0</td>\n",
609
- " <td>133.0</td>\n",
610
- " <td>2659.431451</td>\n",
611
- " <td>NaN</td>\n",
612
- " <td>2016</td>\n",
613
- " <td>Asia</td>\n",
614
- " <td>Gasoline</td>\n",
615
- " <td>Front-wheel drive</td>\n",
616
- " <td>-1.0</td>\n",
617
- " <td>16.077730</td>\n",
618
- " </tr>\n",
619
- " </tbody>\n",
620
- "</table>\n",
621
- "</div>"
622
- ],
623
- "text/plain": [
624
- " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
625
- "8 250 1.0 174.0 2714.219310 \n",
626
- "12 320 5.0 145.0 2783.868974 \n",
627
- "14 200 6.0 160.0 3582.687368 \n",
628
- "20 150 3.0 197.0 2231.808142 \n",
629
- "21 160 4.0 133.0 2659.431451 \n",
630
- "\n",
631
- " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
632
- "8 10.3 2016 Asia Diesel Front-wheel drive -1.0 \n",
633
- "12 15.1 2010 Asia Diesel All-wheel drive 1.0 \n",
634
- "14 14.9 2007 Asia Diesel All-wheel drive 0.0 \n",
635
- "20 18.7 2011 Asia Gasoline Front-wheel drive 1.0 \n",
636
- "21 NaN 2016 Asia Gasoline Front-wheel drive -1.0 \n",
637
- "\n",
638
- " fuel_efficiency_mpg \n",
639
- "8 16.823554 \n",
640
- "12 16.175820 \n",
641
- "14 11.871091 \n",
642
- "20 18.889083 \n",
643
- "21 16.077730 "
644
- ]
645
- },
646
- "execution_count": 63,
647
- "metadata": {},
648
- "output_type": "execute_result"
649
- }
650
- ],
651
- "source": [
652
- "mask_asia = df['origin'] == 'Asia'\n",
653
- "eff = df[mask_asia]\n",
654
- "eff.head()"
655
- ]
656
- },
657
- {
658
- "cell_type": "code",
659
- "execution_count": 64,
660
- "id": "5d1f7efd-77f9-4568-a012-5cd1ba753fd6",
661
- "metadata": {},
662
- "outputs": [
663
- {
664
- "data": {
665
- "text/plain": [
666
- "23.759122836520497"
667
- ]
668
- },
669
- "execution_count": 64,
670
- "metadata": {},
671
- "output_type": "execute_result"
672
- }
673
- ],
674
- "source": [
675
- "max_eff = max(eff['fuel_efficiency_mpg'])\n",
676
- "max_eff"
677
- ]
678
- },
679
- {
680
- "cell_type": "markdown",
681
- "id": "cda34c7c-f2c0-497d-b419-dae670db022b",
682
- "metadata": {},
683
- "source": [
684
- "## 5. Median value of horsepower"
685
- ]
686
- },
687
- {
688
- "cell_type": "code",
689
- "execution_count": 65,
690
- "id": "e8328da7-f04f-41bd-94f5-b534aa00f2c1",
691
- "metadata": {},
692
- "outputs": [
693
- {
694
- "data": {
695
- "text/plain": [
696
- "engine_displacement 0\n",
697
- "num_cylinders 482\n",
698
- "horsepower 708\n",
699
- "vehicle_weight 0\n",
700
- "acceleration 930\n",
701
- "model_year 0\n",
702
- "origin 0\n",
703
- "fuel_type 0\n",
704
- "drivetrain 0\n",
705
- "num_doors 502\n",
706
- "fuel_efficiency_mpg 0\n",
707
- "dtype: int64"
708
- ]
709
- },
710
- "execution_count": 65,
711
- "metadata": {},
712
- "output_type": "execute_result"
713
- }
714
- ],
715
- "source": [
716
- "df.isnull().sum()"
717
- ]
718
- },
719
- {
720
- "cell_type": "code",
721
- "execution_count": 66,
722
- "id": "6eaafaf8-6674-443d-b26c-6d8212d91754",
723
- "metadata": {},
724
- "outputs": [
725
- {
726
- "data": {
727
- "text/plain": [
728
- "149.0"
729
- ]
730
- },
731
- "execution_count": 66,
732
- "metadata": {},
733
- "output_type": "execute_result"
734
- }
735
- ],
736
- "source": [
737
- "# median of the horsepower col\n",
738
- "df['horsepower'].median()"
739
- ]
740
- },
741
- {
742
- "cell_type": "code",
743
- "execution_count": 67,
744
- "id": "9b785320-6b9a-41c0-bb27-c0f126145177",
745
- "metadata": {},
746
- "outputs": [
747
- {
748
- "data": {
749
- "text/plain": [
750
- "horsepower\n",
751
- "152.0 142\n",
752
- "145.0 141\n",
753
- "151.0 134\n",
754
- "148.0 130\n",
755
- "141.0 130\n",
756
- " ... \n",
757
- "40.0 1\n",
758
- "57.0 1\n",
759
- "245.0 1\n",
760
- "252.0 1\n",
761
- "61.0 1\n",
762
- "Name: count, Length: 192, dtype: int64"
763
- ]
764
- },
765
- "execution_count": 67,
766
- "metadata": {},
767
- "output_type": "execute_result"
768
- }
769
- ],
770
- "source": [
771
- "# most frequent value here\n",
772
- "df['horsepower'].value_counts()"
773
- ]
774
- },
775
- {
776
- "cell_type": "code",
777
- "execution_count": 74,
778
- "id": "126b6df6-515f-463e-83f3-10abbf2c25e2",
779
- "metadata": {},
780
- "outputs": [
781
- {
782
- "data": {
783
- "text/plain": [
784
- "np.float64(152.0)"
785
- ]
786
- },
787
- "execution_count": 74,
788
- "metadata": {},
789
- "output_type": "execute_result"
790
- }
791
- ],
792
- "source": [
793
- "# alternatively mode gives the max freq count\n",
794
- "mode_horsepower = df['horsepower'].mode()[0]\n",
795
- "mode_horsepower"
796
- ]
797
- },
798
- {
799
- "cell_type": "code",
800
- "execution_count": 80,
801
- "id": "bd17e63f-c5c1-4d8a-8ba5-1b8e106175fc",
802
- "metadata": {},
803
- "outputs": [],
804
- "source": [
805
- "# fill the missing values in the col with mode\n",
806
- "df['horsepower'].fillna(mode_horsepower, inplace=True)"
807
- ]
808
- },
809
- {
810
- "cell_type": "code",
811
- "execution_count": 81,
812
- "id": "e7dc6b1a-323a-4f88-b475-f76059759e66",
813
- "metadata": {},
814
- "outputs": [
815
- {
816
- "data": {
817
- "text/plain": [
818
- "engine_displacement 0\n",
819
- "num_cylinders 482\n",
820
- "horsepower 0\n",
821
- "vehicle_weight 0\n",
822
- "acceleration 930\n",
823
- "model_year 0\n",
824
- "origin 0\n",
825
- "fuel_type 0\n",
826
- "drivetrain 0\n",
827
- "num_doors 502\n",
828
- "fuel_efficiency_mpg 0\n",
829
- "dtype: int64"
830
- ]
831
- },
832
- "execution_count": 81,
833
- "metadata": {},
834
- "output_type": "execute_result"
835
- }
836
- ],
837
- "source": [
838
- "# check if null values are removed or not\n",
839
- "df.isnull().sum()"
840
- ]
841
- },
842
- {
843
- "cell_type": "markdown",
844
- "id": "3eaf5439-3f99-4506-bdd8-ca35e03c18bf",
845
- "metadata": {},
846
- "source": [
847
- "Clearly the null values have been imputed"
848
- ]
849
- },
850
- {
851
- "cell_type": "code",
852
- "execution_count": 82,
853
- "id": "94dc61c5-bbcb-47f4-9370-f3238c26e2a2",
854
- "metadata": {},
855
- "outputs": [
856
- {
857
- "data": {
858
- "text/plain": [
859
- "152.0"
860
- ]
861
- },
862
- "execution_count": 82,
863
- "metadata": {},
864
- "output_type": "execute_result"
865
- }
866
- ],
867
- "source": [
868
- "# now recalculate the median\n",
869
- "df['horsepower'].median()"
870
- ]
871
- },
872
- {
873
- "cell_type": "markdown",
874
- "id": "32337e0a-dbfc-4e96-9fab-15723b3a5166",
875
- "metadata": {},
876
- "source": [
877
- "## 6. Model building"
878
- ]
879
- },
880
- {
881
- "cell_type": "code",
882
- "execution_count": 84,
883
- "id": "a28d7bfb-3f4d-4018-8881-b39bf43d4089",
884
- "metadata": {},
885
- "outputs": [
886
- {
887
- "data": {
888
- "text/html": [
889
- "<div>\n",
890
- "<style scoped>\n",
891
- " .dataframe tbody tr th:only-of-type {\n",
892
- " vertical-align: middle;\n",
893
- " }\n",
894
- "\n",
895
- " .dataframe tbody tr th {\n",
896
- " vertical-align: top;\n",
897
- " }\n",
898
- "\n",
899
- " .dataframe thead th {\n",
900
- " text-align: right;\n",
901
- " }\n",
902
- "</style>\n",
903
- "<table border=\"1\" class=\"dataframe\">\n",
904
- " <thead>\n",
905
- " <tr style=\"text-align: right;\">\n",
906
- " <th></th>\n",
907
- " <th>engine_displacement</th>\n",
908
- " <th>num_cylinders</th>\n",
909
- " <th>horsepower</th>\n",
910
- " <th>vehicle_weight</th>\n",
911
- " <th>acceleration</th>\n",
912
- " <th>model_year</th>\n",
913
- " <th>origin</th>\n",
914
- " <th>fuel_type</th>\n",
915
- " <th>drivetrain</th>\n",
916
- " <th>num_doors</th>\n",
917
- " <th>fuel_efficiency_mpg</th>\n",
918
- " </tr>\n",
919
- " </thead>\n",
920
- " <tbody>\n",
921
- " <tr>\n",
922
- " <th>0</th>\n",
923
- " <td>170</td>\n",
924
- " <td>3.0</td>\n",
925
- " <td>159.0</td>\n",
926
- " <td>3413.433759</td>\n",
927
- " <td>17.7</td>\n",
928
- " <td>2003</td>\n",
929
- " <td>Europe</td>\n",
930
- " <td>Gasoline</td>\n",
931
- " <td>All-wheel drive</td>\n",
932
- " <td>0.0</td>\n",
933
- " <td>13.231729</td>\n",
934
- " </tr>\n",
935
- " <tr>\n",
936
- " <th>1</th>\n",
937
- " <td>130</td>\n",
938
- " <td>5.0</td>\n",
939
- " <td>97.0</td>\n",
940
- " <td>3149.664934</td>\n",
941
- " <td>17.8</td>\n",
942
- " <td>2007</td>\n",
943
- " <td>USA</td>\n",
944
- " <td>Gasoline</td>\n",
945
- " <td>Front-wheel drive</td>\n",
946
- " <td>0.0</td>\n",
947
- " <td>13.688217</td>\n",
948
- " </tr>\n",
949
- " <tr>\n",
950
- " <th>2</th>\n",
951
- " <td>170</td>\n",
952
- " <td>NaN</td>\n",
953
- " <td>78.0</td>\n",
954
- " <td>3079.038997</td>\n",
955
- " <td>15.1</td>\n",
956
- " <td>2018</td>\n",
957
- " <td>Europe</td>\n",
958
- " <td>Gasoline</td>\n",
959
- " <td>Front-wheel drive</td>\n",
960
- " <td>0.0</td>\n",
961
- " <td>14.246341</td>\n",
962
- " </tr>\n",
963
- " <tr>\n",
964
- " <th>3</th>\n",
965
- " <td>220</td>\n",
966
- " <td>4.0</td>\n",
967
- " <td>152.0</td>\n",
968
- " <td>2542.392402</td>\n",
969
- " <td>20.2</td>\n",
970
- " <td>2009</td>\n",
971
- " <td>USA</td>\n",
972
- " <td>Diesel</td>\n",
973
- " <td>All-wheel drive</td>\n",
974
- " <td>2.0</td>\n",
975
- " <td>16.912736</td>\n",
976
- " </tr>\n",
977
- " <tr>\n",
978
- " <th>4</th>\n",
979
- " <td>210</td>\n",
980
- " <td>1.0</td>\n",
981
- " <td>140.0</td>\n",
982
- " <td>3460.870990</td>\n",
983
- " <td>14.4</td>\n",
984
- " <td>2009</td>\n",
985
- " <td>Europe</td>\n",
986
- " <td>Gasoline</td>\n",
987
- " <td>All-wheel drive</td>\n",
988
- " <td>2.0</td>\n",
989
- " <td>12.488369</td>\n",
990
- " </tr>\n",
991
- " </tbody>\n",
992
- "</table>\n",
993
- "</div>"
994
- ],
995
- "text/plain": [
996
- " engine_displacement num_cylinders horsepower vehicle_weight \\\n",
997
- "0 170 3.0 159.0 3413.433759 \n",
998
- "1 130 5.0 97.0 3149.664934 \n",
999
- "2 170 NaN 78.0 3079.038997 \n",
1000
- "3 220 4.0 152.0 2542.392402 \n",
1001
- "4 210 1.0 140.0 3460.870990 \n",
1002
- "\n",
1003
- " acceleration model_year origin fuel_type drivetrain num_doors \\\n",
1004
- "0 17.7 2003 Europe Gasoline All-wheel drive 0.0 \n",
1005
- "1 17.8 2007 USA Gasoline Front-wheel drive 0.0 \n",
1006
- "2 15.1 2018 Europe Gasoline Front-wheel drive 0.0 \n",
1007
- "3 20.2 2009 USA Diesel All-wheel drive 2.0 \n",
1008
- "4 14.4 2009 Europe Gasoline All-wheel drive 2.0 \n",
1009
- "\n",
1010
- " fuel_efficiency_mpg \n",
1011
- "0 13.231729 \n",
1012
- "1 13.688217 \n",
1013
- "2 14.246341 \n",
1014
- "3 16.912736 \n",
1015
- "4 12.488369 "
1016
- ]
1017
- },
1018
- "execution_count": 84,
1019
- "metadata": {},
1020
- "output_type": "execute_result"
1021
- }
1022
- ],
1023
- "source": [
1024
- "df.head()"
1025
- ]
1026
- },
1027
- {
1028
- "cell_type": "code",
1029
- "execution_count": 83,
1030
- "id": "30057fab-fad4-44ae-9b9b-2aae11614f84",
1031
- "metadata": {},
1032
- "outputs": [
1033
- {
1034
- "data": {
1035
- "text/plain": [
1036
- "0 False\n",
1037
- "1 False\n",
1038
- "2 False\n",
1039
- "3 False\n",
1040
- "4 False\n",
1041
- "Name: origin, dtype: bool"
1042
- ]
1043
- },
1044
- "execution_count": 83,
1045
- "metadata": {},
1046
- "output_type": "execute_result"
1047
- }
1048
- ],
1049
- "source": [
1050
- "mask_asia.head()"
1051
- ]
1052
- },
1053
- {
1054
- "cell_type": "code",
1055
- "execution_count": 88,
1056
- "id": "dbaa1132-9a2f-411a-9668-b5110109e3aa",
1057
- "metadata": {},
1058
- "outputs": [],
1059
- "source": [
1060
- "columns_to_keep = ['vehicle_weight', 'model_year']"
1061
- ]
1062
- },
1063
- {
1064
- "cell_type": "code",
1065
- "execution_count": 94,
1066
- "id": "c37eb7f0-4e38-4a8d-b5a0-f54ba43ef6c7",
1067
- "metadata": {},
1068
- "outputs": [
1069
- {
1070
- "data": {
1071
- "text/html": [
1072
- "<div>\n",
1073
- "<style scoped>\n",
1074
- " .dataframe tbody tr th:only-of-type {\n",
1075
- " vertical-align: middle;\n",
1076
- " }\n",
1077
- "\n",
1078
- " .dataframe tbody tr th {\n",
1079
- " vertical-align: top;\n",
1080
- " }\n",
1081
- "\n",
1082
- " .dataframe thead th {\n",
1083
- " text-align: right;\n",
1084
- " }\n",
1085
- "</style>\n",
1086
- "<table border=\"1\" class=\"dataframe\">\n",
1087
- " <thead>\n",
1088
- " <tr style=\"text-align: right;\">\n",
1089
- " <th></th>\n",
1090
- " <th>vehicle_weight</th>\n",
1091
- " <th>model_year</th>\n",
1092
- " </tr>\n",
1093
- " </thead>\n",
1094
- " <tbody>\n",
1095
- " <tr>\n",
1096
- " <th>8</th>\n",
1097
- " <td>2714.219310</td>\n",
1098
- " <td>2016</td>\n",
1099
- " </tr>\n",
1100
- " <tr>\n",
1101
- " <th>12</th>\n",
1102
- " <td>2783.868974</td>\n",
1103
- " <td>2010</td>\n",
1104
- " </tr>\n",
1105
- " <tr>\n",
1106
- " <th>14</th>\n",
1107
- " <td>3582.687368</td>\n",
1108
- " <td>2007</td>\n",
1109
- " </tr>\n",
1110
- " <tr>\n",
1111
- " <th>20</th>\n",
1112
- " <td>2231.808142</td>\n",
1113
- " <td>2011</td>\n",
1114
- " </tr>\n",
1115
- " <tr>\n",
1116
- " <th>21</th>\n",
1117
- " <td>2659.431451</td>\n",
1118
- " <td>2016</td>\n",
1119
- " </tr>\n",
1120
- " <tr>\n",
1121
- " <th>34</th>\n",
1122
- " <td>2844.227534</td>\n",
1123
- " <td>2014</td>\n",
1124
- " </tr>\n",
1125
- " <tr>\n",
1126
- " <th>38</th>\n",
1127
- " <td>3761.994038</td>\n",
1128
- " <td>2019</td>\n",
1129
- " </tr>\n",
1130
- " </tbody>\n",
1131
- "</table>\n",
1132
- "</div>"
1133
- ],
1134
- "text/plain": [
1135
- " vehicle_weight model_year\n",
1136
- "8 2714.219310 2016\n",
1137
- "12 2783.868974 2010\n",
1138
- "14 3582.687368 2007\n",
1139
- "20 2231.808142 2011\n",
1140
- "21 2659.431451 2016\n",
1141
- "34 2844.227534 2014\n",
1142
- "38 3761.994038 2019"
1143
- ]
1144
- },
1145
- "execution_count": 94,
1146
- "metadata": {},
1147
- "output_type": "execute_result"
1148
- }
1149
- ],
1150
- "source": [
1151
- "# subset the asian data\n",
1152
- "df_asia = df[mask_asia]\n",
1153
- "df_asia_final = df_asia[columns_to_keep].head(7)\n",
1154
- "df_asia_final"
1155
- ]
1156
- },
1157
- {
1158
- "cell_type": "code",
1159
- "execution_count": 100,
1160
- "id": "89abd22c-7cc2-49b9-8afd-4e824f4360c7",
1161
- "metadata": {},
1162
- "outputs": [
1163
- {
1164
- "data": {
1165
- "text/plain": [
1166
- "(7, 2)"
1167
- ]
1168
- },
1169
- "execution_count": 100,
1170
- "metadata": {},
1171
- "output_type": "execute_result"
1172
- }
1173
- ],
1174
- "source": [
1175
- "# get the underlying numpy array\n",
1176
- "X = np.array(df_asia_final)\n",
1177
- "X.shape"
1178
- ]
1179
- },
1180
- {
1181
- "cell_type": "code",
1182
- "execution_count": 110,
1183
- "id": "252a6e2f-c7f9-4c30-b74a-8b4e1ea876ab",
1184
- "metadata": {},
1185
- "outputs": [
1186
- {
1187
- "data": {
1188
- "text/plain": [
1189
- "(2, 2)"
1190
- ]
1191
- },
1192
- "execution_count": 110,
1193
- "metadata": {},
1194
- "output_type": "execute_result"
1195
- }
1196
- ],
1197
- "source": [
1198
- "# take the dot product with the traspose (7,2).(2,7) -> (7,7)\n",
1199
- "XTX = X.T @ X\n",
1200
- "XTX.shape"
1201
- ]
1202
- },
1203
- {
1204
- "cell_type": "code",
1205
- "execution_count": 111,
1206
- "id": "63342692-a307-48cd-a6cf-bdfc8e1985c1",
1207
- "metadata": {},
1208
- "outputs": [
1209
- {
1210
- "data": {
1211
- "text/plain": [
1212
- "(2, 2)"
1213
- ]
1214
- },
1215
- "execution_count": 111,
1216
- "metadata": {},
1217
- "output_type": "execute_result"
1218
- }
1219
- ],
1220
- "source": [
1221
- "XTX_inv = np.linalg.inv(XTX)\n",
1222
- "XTX_inv.shape"
1223
- ]
1224
- },
1225
- {
1226
- "cell_type": "code",
1227
- "execution_count": 112,
1228
- "id": "e4b0a33e-ee66-48d3-82d4-7953e0a64461",
1229
- "metadata": {},
1230
- "outputs": [
1231
- {
1232
- "data": {
1233
- "text/plain": [
1234
- "array([1100, 1300, 800, 900, 1000, 1100, 1200])"
1235
- ]
1236
- },
1237
- "execution_count": 112,
1238
- "metadata": {},
1239
- "output_type": "execute_result"
1240
- }
1241
- ],
1242
- "source": [
1243
- "# Create an array y with values \n",
1244
- "y = np.array([1100, 1300, 800, 900, 1000, 1100, 1200])\n",
1245
- "y "
1246
- ]
1247
- },
1248
- {
1249
- "cell_type": "code",
1250
- "execution_count": 114,
1251
- "id": "e5b0bf7e-9e5d-46c8-9d47-4555f05bfc6f",
1252
- "metadata": {},
1253
- "outputs": [],
1254
- "source": [
1255
- "# Multiply the inverse of XTX with the transpose of X, and then multiply the result by y. Call the result w\n",
1256
- "step = XTX_inv @ X.T\n",
1257
- "w = step @ y"
1258
- ]
1259
- },
1260
- {
1261
- "cell_type": "code",
1262
- "execution_count": 115,
1263
- "id": "1ddb3d1f-b877-4c66-9245-098cd63b850a",
1264
- "metadata": {},
1265
- "outputs": [
1266
- {
1267
- "data": {
1268
- "text/plain": [
1269
- "np.float64(0.5187709081074016)"
1270
- ]
1271
- },
1272
- "execution_count": 115,
1273
- "metadata": {},
1274
- "output_type": "execute_result"
1275
- }
1276
- ],
1277
- "source": [
1278
- "# sum of all the elements of the result / weights\n",
1279
- "np.sum(w)"
1280
- ]
1281
- },
1282
- {
1283
- "cell_type": "markdown",
1284
- "id": "5cad1468-2329-4fa3-9b91-0dc30dffafbc",
1285
- "metadata": {},
1286
- "source": [
1287
- "## End of Week 1"
1288
- ]
1289
- },
1290
- {
1291
- "cell_type": "code",
1292
- "execution_count": null,
1293
- "id": "2bbec182-f585-43fa-9960-ca979139c0e2",
1294
- "metadata": {},
1295
- "outputs": [],
1296
- "source": []
1297
- }
1298
- ],
1299
- "metadata": {
1300
- "kernelspec": {
1301
- "display_name": "Python 3 (ipykernel)",
1302
- "language": "python",
1303
- "name": "python3"
1304
- },
1305
- "language_info": {
1306
- "codemirror_mode": {
1307
- "name": "ipython",
1308
- "version": 3
1309
- },
1310
- "file_extension": ".py",
1311
- "mimetype": "text/x-python",
1312
- "name": "python",
1313
- "nbconvert_exporter": "python",
1314
- "pygments_lexer": "ipython3",
1315
- "version": "3.11.13"
1316
- }
1317
- },
1318
- "nbformat": 4,
1319
- "nbformat_minor": 5
1320
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Week 1/car_fuel_efficiency.csv DELETED
The diff for this file is too large to render. See raw diff
 
Week 1/readme.md DELETED
@@ -1,83 +0,0 @@
1
- # Machine Learning Zoomcamp – Week 1: Linear Algebra Foundations
2
-
3
- [![Python](https://img.shields.io/badge/Python-3.10-blue)](https://www.python.org/)
4
- [![Jupyter Notebook](https://img.shields.io/badge/Jupyter-Notebook-orange)](https://jupyter.org/)
5
- [![NumPy](https://img.shields.io/badge/NumPy-1.26-blue)](https://numpy.org/)
6
-
7
- This repository documents my journey through **Week 1** of the **Machine Learning Zoomcamp**, a comprehensive 4-month course offered by **DataTalksClub**. Week 1 focuses on building the **mathematical foundation** required for machine learning, including linear algebra and matrix operations.
8
-
9
- ---
10
-
11
- ## πŸ“˜ Week 1 Overview
12
-
13
- The goal of this week was to understand the mathematical underpinnings of machine learning algorithms. Key topics included:
14
-
15
- - **Matrix Operations**: Matrix multiplication, transposition, and inversion.
16
- - **Linear Algebra Fundamentals**: Dot products, matrix shapes, and their relevance in ML.
17
- - **Practical Applications**: Implementing linear algebra concepts using Python and NumPy.
18
-
19
- ---
20
-
21
- ## πŸ”§ Exercises and Implementations
22
-
23
- The exercises involved:
24
-
25
- - Computing the transpose of a matrix `X` and performing `X.T @ X`.
26
- - Inverting the resulting matrix `(X.T @ X)^(-1)`.
27
- - Using the inverse to solve linear equations, a fundamental step in linear regression.
28
-
29
- ---
30
-
31
- ## πŸ§ͺ Example Problem
32
-
33
- One of the exercises included:
34
-
35
- 1. Creating a dataset:
36
-
37
- ```python
38
- y = [1100, 1300, 800, 900, 1000, 1100, 1200]
39
- ````
40
-
41
- 2. Computing `X.T @ X`, inverting it, multiplying by `X.T`, and then multiplying by `y` to get the weight vector `w`.
42
-
43
- ```python
44
- import numpy as np
45
-
46
- # Example steps
47
- XTX = X.T @ X
48
- XTX_inv = np.linalg.inv(XTX)
49
- w = XTX_inv @ X.T @ y
50
- ```
51
-
52
- 3. Summing all elements of `w` to analyze the result:
53
-
54
- ```python
55
- total_weight = np.sum(w)
56
- print("Sum of weights:", total_weight)
57
- ```
58
-
59
- ---
60
-
61
- ## πŸ› οΈ Technologies Used
62
-
63
- * **Python** – Programming language for implementation
64
- * **NumPy** – Efficient numerical computations and linear algebra
65
- * **Jupyter Notebooks** – Interactive environment for running exercises
66
-
67
- ---
68
-
69
- ## πŸ“Œ Key Takeaways
70
-
71
- * Mastering linear algebra is essential for understanding machine learning algorithms.
72
- * Operations like matrix multiplication and inversion form the core of regression and many ML models.
73
- * Hands-on exercises help translate theoretical concepts into practical applications.
74
-
75
- ---
76
-
77
- ## πŸ”— Resources
78
-
79
- * [Machine Learning Zoomcamp](https://github.com/DataTalksClub/mlzoomcamp) – Official course repository
80
- * [NumPy Documentation](https://numpy.org/doc/) – For matrix operations and linear algebra
81
- * [Jupyter Notebooks](https://jupyter.org/) – Interactive coding environment
82
-
83
- ```