MsSaidat25 commited on
Commit
73047cf
·
1 Parent(s): ee6a74b

Created using Colab

Browse files
Files changed (1) hide show
  1. Untitled1.ipynb +1230 -0
Untitled1.ipynb ADDED
@@ -0,0 +1,1230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "authorship_tag": "ABX9TyM6lcWDIRzwQ5fcw7a7TiiZ",
8
+ "include_colab_link": true
9
+ },
10
+ "kernelspec": {
11
+ "name": "python3",
12
+ "display_name": "Python 3"
13
+ },
14
+ "language_info": {
15
+ "name": "python"
16
+ }
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "markdown",
21
+ "metadata": {
22
+ "id": "view-in-github",
23
+ "colab_type": "text"
24
+ },
25
+ "source": [
26
+ "<a href=\"https://colab.research.google.com/github/MsSaidat25/AI-Engineer-Projects/blob/main/Untitled1.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {
33
+ "colab": {
34
+ "base_uri": "https://localhost:8080/"
35
+ },
36
+ "id": "cMIjQEwLJPKQ",
37
+ "outputId": "c3c64dff-48e9-4dab-e007-fc9cf25753e9"
38
+ },
39
+ "outputs": [
40
+ {
41
+ "output_type": "stream",
42
+ "name": "stdout",
43
+ "text": [
44
+ "Cloning into 'The-Machine-Learning-Workshop'...\n",
45
+ "remote: Enumerating objects: 805, done.\u001b[K\n",
46
+ "remote: Counting objects: 100% (23/23), done.\u001b[K\n",
47
+ "remote: Compressing objects: 100% (15/15), done.\u001b[K\n",
48
+ "remote: Total 805 (delta 15), reused 8 (delta 8), pack-reused 782 (from 1)\u001b[K\n",
49
+ "Receiving objects: 100% (805/805), 10.36 MiB | 9.64 MiB/s, done.\n",
50
+ "Resolving deltas: 100% (293/293), done.\n"
51
+ ]
52
+ }
53
+ ],
54
+ "source": [
55
+ "!git clone https://github.com/MsSaidat25/The-Machine-Learning-Workshop.git"
56
+ ]
57
+ },
58
+ {
59
+ "cell_type": "code",
60
+ "source": [
61
+ "import os\n",
62
+ "os.chdir('/content/The-Machine-Learning-Workshop')\n",
63
+ "!ls # see all folders/files"
64
+ ],
65
+ "metadata": {
66
+ "colab": {
67
+ "base_uri": "https://localhost:8080/"
68
+ },
69
+ "id": "fkumve32Jj-w",
70
+ "outputId": "f5f0c473-b4c3-4e25-b4e9-23db465582c1"
71
+ },
72
+ "execution_count": null,
73
+ "outputs": [
74
+ {
75
+ "output_type": "stream",
76
+ "name": "stdout",
77
+ "text": [
78
+ "Chapter01 Chapter03 Chapter05 Graphics README.md\n",
79
+ "Chapter02 Chapter04 Chapter06 LICENSE requirements.txt\n"
80
+ ]
81
+ }
82
+ ]
83
+ },
84
+ {
85
+ "cell_type": "code",
86
+ "metadata": {
87
+ "colab": {
88
+ "base_uri": "https://localhost:8080/"
89
+ },
90
+ "id": "8a5d3702",
91
+ "outputId": "312c313b-b695-48a4-e9f6-cad0ad63f4f9"
92
+ },
93
+ "source": [
94
+ "import os\n",
95
+ "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01')\n",
96
+ "!ls"
97
+ ],
98
+ "execution_count": null,
99
+ "outputs": [
100
+ {
101
+ "output_type": "stream",
102
+ "name": "stdout",
103
+ "text": [
104
+ "Activity1.01 Exercise1.01 Exercise1.03\n",
105
+ "Activity1.02 Exercise1.02 Exercise1.04\n"
106
+ ]
107
+ }
108
+ ]
109
+ },
110
+ {
111
+ "cell_type": "code",
112
+ "metadata": {
113
+ "colab": {
114
+ "base_uri": "https://localhost:8080/"
115
+ },
116
+ "id": "99014702",
117
+ "outputId": "a7833725-7b7b-4655-8a28-5f0898678ede"
118
+ },
119
+ "source": [
120
+ "import json\n",
121
+ "\n",
122
+ "notebook_path = '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'\n",
123
+ "\n",
124
+ "with open(notebook_path, 'r') as f:\n",
125
+ " notebook_content = json.load(f)\n",
126
+ "\n",
127
+ "cells_to_generate = []\n",
128
+ "for cell in notebook_content['cells']:\n",
129
+ " if cell['cell_type'] == 'code':\n",
130
+ " cells_to_generate.append({'cell_type': 'python', 'code': ''.join(cell['source'])})\n",
131
+ " elif cell['cell_type'] == 'markdown':\n",
132
+ " cells_to_generate.append({'cell_type': 'markdown', 'code': ''.join(cell['source'])})\n",
133
+ "\n",
134
+ "# This list will be used by the next command to generate the actual cells.\n",
135
+ "# For now, I will just print the first few cells to confirm the parsing.\n",
136
+ "print(f\"Found {len(cells_to_generate)} cells in the notebook. Preview of the first cell:\\n{cells_to_generate[0]['code'] if cells_to_generate else 'No cells found.'}\")"
137
+ ],
138
+ "execution_count": null,
139
+ "outputs": [
140
+ {
141
+ "output_type": "stream",
142
+ "name": "stdout",
143
+ "text": [
144
+ "Found 5 cells in the notebook. Preview of the first cell:\n",
145
+ "import seaborn as sns\n",
146
+ "titanic = sns.load_dataset('titanic')\n",
147
+ "titanic.head(10)\n"
148
+ ]
149
+ }
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "metadata": {
155
+ "colab": {
156
+ "base_uri": "https://localhost:8080/"
157
+ },
158
+ "id": "12666191",
159
+ "outputId": "cb746e2f-c18d-4518-88c9-d6e3a401b318"
160
+ },
161
+ "source": [
162
+ "import os\n",
163
+ "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01')\n",
164
+ "!ls"
165
+ ],
166
+ "execution_count": null,
167
+ "outputs": [
168
+ {
169
+ "output_type": "stream",
170
+ "name": "stdout",
171
+ "text": [
172
+ "Activity1_01.ipynb titanic.csv unit_test_activity1_01.ipynb\n"
173
+ ]
174
+ }
175
+ ]
176
+ },
177
+ {
178
+ "cell_type": "code",
179
+ "metadata": {
180
+ "colab": {
181
+ "base_uri": "https://localhost:8080/",
182
+ "height": 383
183
+ },
184
+ "id": "1b2938e2",
185
+ "outputId": "50844bb9-e0d9-48bf-a6ae-79e9aa0a0c8c"
186
+ },
187
+ "source": [
188
+ "import seaborn as sns\n",
189
+ "titanic = sns.load_dataset('titanic')\n",
190
+ "titanic.head(10)"
191
+ ],
192
+ "execution_count": null,
193
+ "outputs": [
194
+ {
195
+ "output_type": "execute_result",
196
+ "data": {
197
+ "text/plain": [
198
+ " survived pclass sex age sibsp parch fare embarked class \\\n",
199
+ "0 0 3 male 22.0 1 0 7.2500 S Third \n",
200
+ "1 1 1 female 38.0 1 0 71.2833 C First \n",
201
+ "2 1 3 female 26.0 0 0 7.9250 S Third \n",
202
+ "3 1 1 female 35.0 1 0 53.1000 S First \n",
203
+ "4 0 3 male 35.0 0 0 8.0500 S Third \n",
204
+ "5 0 3 male NaN 0 0 8.4583 Q Third \n",
205
+ "6 0 1 male 54.0 0 0 51.8625 S First \n",
206
+ "7 0 3 male 2.0 3 1 21.0750 S Third \n",
207
+ "8 1 3 female 27.0 0 2 11.1333 S Third \n",
208
+ "9 1 2 female 14.0 1 0 30.0708 C Second \n",
209
+ "\n",
210
+ " who adult_male deck embark_town alive alone \n",
211
+ "0 man True NaN Southampton no False \n",
212
+ "1 woman False C Cherbourg yes False \n",
213
+ "2 woman False NaN Southampton yes True \n",
214
+ "3 woman False C Southampton yes False \n",
215
+ "4 man True NaN Southampton no True \n",
216
+ "5 man True NaN Queenstown no True \n",
217
+ "6 man True E Southampton no True \n",
218
+ "7 child False NaN Southampton no False \n",
219
+ "8 woman False NaN Southampton yes False \n",
220
+ "9 child False NaN Cherbourg yes False "
221
+ ],
222
+ "text/html": [
223
+ "\n",
224
+ " <div id=\"df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca\" class=\"colab-df-container\">\n",
225
+ " <div>\n",
226
+ "<style scoped>\n",
227
+ " .dataframe tbody tr th:only-of-type {\n",
228
+ " vertical-align: middle;\n",
229
+ " }\n",
230
+ "\n",
231
+ " .dataframe tbody tr th {\n",
232
+ " vertical-align: top;\n",
233
+ " }\n",
234
+ "\n",
235
+ " .dataframe thead th {\n",
236
+ " text-align: right;\n",
237
+ " }\n",
238
+ "</style>\n",
239
+ "<table border=\"1\" class=\"dataframe\">\n",
240
+ " <thead>\n",
241
+ " <tr style=\"text-align: right;\">\n",
242
+ " <th></th>\n",
243
+ " <th>survived</th>\n",
244
+ " <th>pclass</th>\n",
245
+ " <th>sex</th>\n",
246
+ " <th>age</th>\n",
247
+ " <th>sibsp</th>\n",
248
+ " <th>parch</th>\n",
249
+ " <th>fare</th>\n",
250
+ " <th>embarked</th>\n",
251
+ " <th>class</th>\n",
252
+ " <th>who</th>\n",
253
+ " <th>adult_male</th>\n",
254
+ " <th>deck</th>\n",
255
+ " <th>embark_town</th>\n",
256
+ " <th>alive</th>\n",
257
+ " <th>alone</th>\n",
258
+ " </tr>\n",
259
+ " </thead>\n",
260
+ " <tbody>\n",
261
+ " <tr>\n",
262
+ " <th>0</th>\n",
263
+ " <td>0</td>\n",
264
+ " <td>3</td>\n",
265
+ " <td>male</td>\n",
266
+ " <td>22.0</td>\n",
267
+ " <td>1</td>\n",
268
+ " <td>0</td>\n",
269
+ " <td>7.2500</td>\n",
270
+ " <td>S</td>\n",
271
+ " <td>Third</td>\n",
272
+ " <td>man</td>\n",
273
+ " <td>True</td>\n",
274
+ " <td>NaN</td>\n",
275
+ " <td>Southampton</td>\n",
276
+ " <td>no</td>\n",
277
+ " <td>False</td>\n",
278
+ " </tr>\n",
279
+ " <tr>\n",
280
+ " <th>1</th>\n",
281
+ " <td>1</td>\n",
282
+ " <td>1</td>\n",
283
+ " <td>female</td>\n",
284
+ " <td>38.0</td>\n",
285
+ " <td>1</td>\n",
286
+ " <td>0</td>\n",
287
+ " <td>71.2833</td>\n",
288
+ " <td>C</td>\n",
289
+ " <td>First</td>\n",
290
+ " <td>woman</td>\n",
291
+ " <td>False</td>\n",
292
+ " <td>C</td>\n",
293
+ " <td>Cherbourg</td>\n",
294
+ " <td>yes</td>\n",
295
+ " <td>False</td>\n",
296
+ " </tr>\n",
297
+ " <tr>\n",
298
+ " <th>2</th>\n",
299
+ " <td>1</td>\n",
300
+ " <td>3</td>\n",
301
+ " <td>female</td>\n",
302
+ " <td>26.0</td>\n",
303
+ " <td>0</td>\n",
304
+ " <td>0</td>\n",
305
+ " <td>7.9250</td>\n",
306
+ " <td>S</td>\n",
307
+ " <td>Third</td>\n",
308
+ " <td>woman</td>\n",
309
+ " <td>False</td>\n",
310
+ " <td>NaN</td>\n",
311
+ " <td>Southampton</td>\n",
312
+ " <td>yes</td>\n",
313
+ " <td>True</td>\n",
314
+ " </tr>\n",
315
+ " <tr>\n",
316
+ " <th>3</th>\n",
317
+ " <td>1</td>\n",
318
+ " <td>1</td>\n",
319
+ " <td>female</td>\n",
320
+ " <td>35.0</td>\n",
321
+ " <td>1</td>\n",
322
+ " <td>0</td>\n",
323
+ " <td>53.1000</td>\n",
324
+ " <td>S</td>\n",
325
+ " <td>First</td>\n",
326
+ " <td>woman</td>\n",
327
+ " <td>False</td>\n",
328
+ " <td>C</td>\n",
329
+ " <td>Southampton</td>\n",
330
+ " <td>yes</td>\n",
331
+ " <td>False</td>\n",
332
+ " </tr>\n",
333
+ " <tr>\n",
334
+ " <th>4</th>\n",
335
+ " <td>0</td>\n",
336
+ " <td>3</td>\n",
337
+ " <td>male</td>\n",
338
+ " <td>35.0</td>\n",
339
+ " <td>0</td>\n",
340
+ " <td>0</td>\n",
341
+ " <td>8.0500</td>\n",
342
+ " <td>S</td>\n",
343
+ " <td>Third</td>\n",
344
+ " <td>man</td>\n",
345
+ " <td>True</td>\n",
346
+ " <td>NaN</td>\n",
347
+ " <td>Southampton</td>\n",
348
+ " <td>no</td>\n",
349
+ " <td>True</td>\n",
350
+ " </tr>\n",
351
+ " <tr>\n",
352
+ " <th>5</th>\n",
353
+ " <td>0</td>\n",
354
+ " <td>3</td>\n",
355
+ " <td>male</td>\n",
356
+ " <td>NaN</td>\n",
357
+ " <td>0</td>\n",
358
+ " <td>0</td>\n",
359
+ " <td>8.4583</td>\n",
360
+ " <td>Q</td>\n",
361
+ " <td>Third</td>\n",
362
+ " <td>man</td>\n",
363
+ " <td>True</td>\n",
364
+ " <td>NaN</td>\n",
365
+ " <td>Queenstown</td>\n",
366
+ " <td>no</td>\n",
367
+ " <td>True</td>\n",
368
+ " </tr>\n",
369
+ " <tr>\n",
370
+ " <th>6</th>\n",
371
+ " <td>0</td>\n",
372
+ " <td>1</td>\n",
373
+ " <td>male</td>\n",
374
+ " <td>54.0</td>\n",
375
+ " <td>0</td>\n",
376
+ " <td>0</td>\n",
377
+ " <td>51.8625</td>\n",
378
+ " <td>S</td>\n",
379
+ " <td>First</td>\n",
380
+ " <td>man</td>\n",
381
+ " <td>True</td>\n",
382
+ " <td>E</td>\n",
383
+ " <td>Southampton</td>\n",
384
+ " <td>no</td>\n",
385
+ " <td>True</td>\n",
386
+ " </tr>\n",
387
+ " <tr>\n",
388
+ " <th>7</th>\n",
389
+ " <td>0</td>\n",
390
+ " <td>3</td>\n",
391
+ " <td>male</td>\n",
392
+ " <td>2.0</td>\n",
393
+ " <td>3</td>\n",
394
+ " <td>1</td>\n",
395
+ " <td>21.0750</td>\n",
396
+ " <td>S</td>\n",
397
+ " <td>Third</td>\n",
398
+ " <td>child</td>\n",
399
+ " <td>False</td>\n",
400
+ " <td>NaN</td>\n",
401
+ " <td>Southampton</td>\n",
402
+ " <td>no</td>\n",
403
+ " <td>False</td>\n",
404
+ " </tr>\n",
405
+ " <tr>\n",
406
+ " <th>8</th>\n",
407
+ " <td>1</td>\n",
408
+ " <td>3</td>\n",
409
+ " <td>female</td>\n",
410
+ " <td>27.0</td>\n",
411
+ " <td>0</td>\n",
412
+ " <td>2</td>\n",
413
+ " <td>11.1333</td>\n",
414
+ " <td>S</td>\n",
415
+ " <td>Third</td>\n",
416
+ " <td>woman</td>\n",
417
+ " <td>False</td>\n",
418
+ " <td>NaN</td>\n",
419
+ " <td>Southampton</td>\n",
420
+ " <td>yes</td>\n",
421
+ " <td>False</td>\n",
422
+ " </tr>\n",
423
+ " <tr>\n",
424
+ " <th>9</th>\n",
425
+ " <td>1</td>\n",
426
+ " <td>2</td>\n",
427
+ " <td>female</td>\n",
428
+ " <td>14.0</td>\n",
429
+ " <td>1</td>\n",
430
+ " <td>0</td>\n",
431
+ " <td>30.0708</td>\n",
432
+ " <td>C</td>\n",
433
+ " <td>Second</td>\n",
434
+ " <td>child</td>\n",
435
+ " <td>False</td>\n",
436
+ " <td>NaN</td>\n",
437
+ " <td>Cherbourg</td>\n",
438
+ " <td>yes</td>\n",
439
+ " <td>False</td>\n",
440
+ " </tr>\n",
441
+ " </tbody>\n",
442
+ "</table>\n",
443
+ "</div>\n",
444
+ " <div class=\"colab-df-buttons\">\n",
445
+ "\n",
446
+ " <div class=\"colab-df-container\">\n",
447
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca')\"\n",
448
+ " title=\"Convert this dataframe to an interactive table.\"\n",
449
+ " style=\"display:none;\">\n",
450
+ "\n",
451
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
452
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
453
+ " </svg>\n",
454
+ " </button>\n",
455
+ "\n",
456
+ " <style>\n",
457
+ " .colab-df-container {\n",
458
+ " display:flex;\n",
459
+ " gap: 12px;\n",
460
+ " }\n",
461
+ "\n",
462
+ " .colab-df-convert {\n",
463
+ " background-color: #E8F0FE;\n",
464
+ " border: none;\n",
465
+ " border-radius: 50%;\n",
466
+ " cursor: pointer;\n",
467
+ " display: none;\n",
468
+ " fill: #1967D2;\n",
469
+ " height: 32px;\n",
470
+ " padding: 0 0 0 0;\n",
471
+ " width: 32px;\n",
472
+ " }\n",
473
+ "\n",
474
+ " .colab-df-convert:hover {\n",
475
+ " background-color: #E2EBFA;\n",
476
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
477
+ " fill: #174EA6;\n",
478
+ " }\n",
479
+ "\n",
480
+ " .colab-df-buttons div {\n",
481
+ " margin-bottom: 4px;\n",
482
+ " }\n",
483
+ "\n",
484
+ " [theme=dark] .colab-df-convert {\n",
485
+ " background-color: #3B4455;\n",
486
+ " fill: #D2E3FC;\n",
487
+ " }\n",
488
+ "\n",
489
+ " [theme=dark] .colab-df-convert:hover {\n",
490
+ " background-color: #434B5C;\n",
491
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
492
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
493
+ " fill: #FFFFFF;\n",
494
+ " }\n",
495
+ " </style>\n",
496
+ "\n",
497
+ " <script>\n",
498
+ " const buttonEl =\n",
499
+ " document.querySelector('#df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca button.colab-df-convert');\n",
500
+ " buttonEl.style.display =\n",
501
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
502
+ "\n",
503
+ " async function convertToInteractive(key) {\n",
504
+ " const element = document.querySelector('#df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca');\n",
505
+ " const dataTable =\n",
506
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
507
+ " [key], {});\n",
508
+ " if (!dataTable) return;\n",
509
+ "\n",
510
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
511
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
512
+ " + ' to learn more about interactive tables.';\n",
513
+ " element.innerHTML = '';\n",
514
+ " dataTable['output_type'] = 'display_data';\n",
515
+ " await google.colab.output.renderOutput(dataTable, element);\n",
516
+ " const docLink = document.createElement('div');\n",
517
+ " docLink.innerHTML = docLinkHtml;\n",
518
+ " element.appendChild(docLink);\n",
519
+ " }\n",
520
+ " </script>\n",
521
+ " </div>\n",
522
+ "\n",
523
+ "\n",
524
+ " </div>\n",
525
+ " </div>\n"
526
+ ],
527
+ "application/vnd.google.colaboratory.intrinsic+json": {
528
+ "type": "dataframe",
529
+ "variable_name": "titanic",
530
+ "summary": "{\n \"name\": \"titanic\",\n \"rows\": 891,\n \"fields\": [\n {\n \"column\": \"survived\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pclass\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.526497332334044,\n \"min\": 0.42,\n \"max\": 80.0,\n \"num_unique_values\": 88,\n \"samples\": [\n 0.75,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sibsp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 8,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"parch\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.693428597180905,\n \"min\": 0.0,\n \"max\": 512.3292,\n \"num_unique_values\": 248,\n \"samples\": [\n 11.2417,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embarked\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"S\",\n \"C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Third\",\n \"First\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"who\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"man\",\n \"woman\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"adult_male\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n false,\n true\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deck\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"C\",\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Southampton\",\n \"Cherbourg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alive\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"yes\",\n \"no\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
531
+ }
532
+ },
533
+ "metadata": {},
534
+ "execution_count": 9
535
+ }
536
+ ]
537
+ },
538
+ {
539
+ "cell_type": "code",
540
+ "metadata": {
541
+ "colab": {
542
+ "base_uri": "https://localhost:8080/",
543
+ "height": 206
544
+ },
545
+ "id": "bd9b06d2",
546
+ "outputId": "b51231af-c764-4e73-bcc1-56a31c16da8b"
547
+ },
548
+ "source": [
549
+ "X = titanic[['sex', 'age', 'fare', 'class', 'embark_town', 'alone']]\n",
550
+ "display(X.head())"
551
+ ],
552
+ "execution_count": null,
553
+ "outputs": [
554
+ {
555
+ "output_type": "display_data",
556
+ "data": {
557
+ "text/plain": [
558
+ " sex age fare class embark_town alone\n",
559
+ "0 male 22.0 7.2500 Third Southampton False\n",
560
+ "1 female 38.0 71.2833 First Cherbourg False\n",
561
+ "2 female 26.0 7.9250 Third Southampton True\n",
562
+ "3 female 35.0 53.1000 First Southampton False\n",
563
+ "4 male 35.0 8.0500 Third Southampton True"
564
+ ],
565
+ "text/html": [
566
+ "\n",
567
+ " <div id=\"df-1bc86b89-9055-4685-a83b-a89268104b28\" class=\"colab-df-container\">\n",
568
+ " <div>\n",
569
+ "<style scoped>\n",
570
+ " .dataframe tbody tr th:only-of-type {\n",
571
+ " vertical-align: middle;\n",
572
+ " }\n",
573
+ "\n",
574
+ " .dataframe tbody tr th {\n",
575
+ " vertical-align: top;\n",
576
+ " }\n",
577
+ "\n",
578
+ " .dataframe thead th {\n",
579
+ " text-align: right;\n",
580
+ " }\n",
581
+ "</style>\n",
582
+ "<table border=\"1\" class=\"dataframe\">\n",
583
+ " <thead>\n",
584
+ " <tr style=\"text-align: right;\">\n",
585
+ " <th></th>\n",
586
+ " <th>sex</th>\n",
587
+ " <th>age</th>\n",
588
+ " <th>fare</th>\n",
589
+ " <th>class</th>\n",
590
+ " <th>embark_town</th>\n",
591
+ " <th>alone</th>\n",
592
+ " </tr>\n",
593
+ " </thead>\n",
594
+ " <tbody>\n",
595
+ " <tr>\n",
596
+ " <th>0</th>\n",
597
+ " <td>male</td>\n",
598
+ " <td>22.0</td>\n",
599
+ " <td>7.2500</td>\n",
600
+ " <td>Third</td>\n",
601
+ " <td>Southampton</td>\n",
602
+ " <td>False</td>\n",
603
+ " </tr>\n",
604
+ " <tr>\n",
605
+ " <th>1</th>\n",
606
+ " <td>female</td>\n",
607
+ " <td>38.0</td>\n",
608
+ " <td>71.2833</td>\n",
609
+ " <td>First</td>\n",
610
+ " <td>Cherbourg</td>\n",
611
+ " <td>False</td>\n",
612
+ " </tr>\n",
613
+ " <tr>\n",
614
+ " <th>2</th>\n",
615
+ " <td>female</td>\n",
616
+ " <td>26.0</td>\n",
617
+ " <td>7.9250</td>\n",
618
+ " <td>Third</td>\n",
619
+ " <td>Southampton</td>\n",
620
+ " <td>True</td>\n",
621
+ " </tr>\n",
622
+ " <tr>\n",
623
+ " <th>3</th>\n",
624
+ " <td>female</td>\n",
625
+ " <td>35.0</td>\n",
626
+ " <td>53.1000</td>\n",
627
+ " <td>First</td>\n",
628
+ " <td>Southampton</td>\n",
629
+ " <td>False</td>\n",
630
+ " </tr>\n",
631
+ " <tr>\n",
632
+ " <th>4</th>\n",
633
+ " <td>male</td>\n",
634
+ " <td>35.0</td>\n",
635
+ " <td>8.0500</td>\n",
636
+ " <td>Third</td>\n",
637
+ " <td>Southampton</td>\n",
638
+ " <td>True</td>\n",
639
+ " </tr>\n",
640
+ " </tbody>\n",
641
+ "</table>\n",
642
+ "</div>\n",
643
+ " <div class=\"colab-df-buttons\">\n",
644
+ "\n",
645
+ " <div class=\"colab-df-container\">\n",
646
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1bc86b89-9055-4685-a83b-a89268104b28')\"\n",
647
+ " title=\"Convert this dataframe to an interactive table.\"\n",
648
+ " style=\"display:none;\">\n",
649
+ "\n",
650
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
651
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
652
+ " </svg>\n",
653
+ " </button>\n",
654
+ "\n",
655
+ " <style>\n",
656
+ " .colab-df-container {\n",
657
+ " display:flex;\n",
658
+ " gap: 12px;\n",
659
+ " }\n",
660
+ "\n",
661
+ " .colab-df-convert {\n",
662
+ " background-color: #E8F0FE;\n",
663
+ " border: none;\n",
664
+ " border-radius: 50%;\n",
665
+ " cursor: pointer;\n",
666
+ " display: none;\n",
667
+ " fill: #1967D2;\n",
668
+ " height: 32px;\n",
669
+ " padding: 0 0 0 0;\n",
670
+ " width: 32px;\n",
671
+ " }\n",
672
+ "\n",
673
+ " .colab-df-convert:hover {\n",
674
+ " background-color: #E2EBFA;\n",
675
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
676
+ " fill: #174EA6;\n",
677
+ " }\n",
678
+ "\n",
679
+ " .colab-df-buttons div {\n",
680
+ " margin-bottom: 4px;\n",
681
+ " }\n",
682
+ "\n",
683
+ " [theme=dark] .colab-df-convert {\n",
684
+ " background-color: #3B4455;\n",
685
+ " fill: #D2E3FC;\n",
686
+ " }\n",
687
+ "\n",
688
+ " [theme=dark] .colab-df-convert:hover {\n",
689
+ " background-color: #434B5C;\n",
690
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
691
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
692
+ " fill: #FFFFFF;\n",
693
+ " }\n",
694
+ " </style>\n",
695
+ "\n",
696
+ " <script>\n",
697
+ " const buttonEl =\n",
698
+ " document.querySelector('#df-1bc86b89-9055-4685-a83b-a89268104b28 button.colab-df-convert');\n",
699
+ " buttonEl.style.display =\n",
700
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
701
+ "\n",
702
+ " async function convertToInteractive(key) {\n",
703
+ " const element = document.querySelector('#df-1bc86b89-9055-4685-a83b-a89268104b28');\n",
704
+ " const dataTable =\n",
705
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
706
+ " [key], {});\n",
707
+ " if (!dataTable) return;\n",
708
+ "\n",
709
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
710
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
711
+ " + ' to learn more about interactive tables.';\n",
712
+ " element.innerHTML = '';\n",
713
+ " dataTable['output_type'] = 'display_data';\n",
714
+ " await google.colab.output.renderOutput(dataTable, element);\n",
715
+ " const docLink = document.createElement('div');\n",
716
+ " docLink.innerHTML = docLinkHtml;\n",
717
+ " element.appendChild(docLink);\n",
718
+ " }\n",
719
+ " </script>\n",
720
+ " </div>\n",
721
+ "\n",
722
+ "\n",
723
+ " </div>\n",
724
+ " </div>\n"
725
+ ],
726
+ "application/vnd.google.colaboratory.intrinsic+json": {
727
+ "type": "dataframe",
728
+ "summary": "{\n \"name\": \"display(X\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.833739825307955,\n \"min\": 22.0,\n \"max\": 38.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 38.0,\n 35.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.5100288352535,\n \"min\": 7.25,\n \"max\": 71.2833,\n \"num_unique_values\": 5,\n \"samples\": [\n 71.2833,\n 8.05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"First\",\n \"Third\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Cherbourg\",\n \"Southampton\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
729
+ }
730
+ },
731
+ "metadata": {}
732
+ }
733
+ ]
734
+ },
735
+ {
736
+ "cell_type": "code",
737
+ "metadata": {
738
+ "colab": {
739
+ "base_uri": "https://localhost:8080/"
740
+ },
741
+ "id": "15cb730c",
742
+ "outputId": "8f852295-a151-4942-b1d0-c163351a517f"
743
+ },
744
+ "source": [
745
+ "X.shape"
746
+ ],
747
+ "execution_count": null,
748
+ "outputs": [
749
+ {
750
+ "output_type": "execute_result",
751
+ "data": {
752
+ "text/plain": [
753
+ "(891, 6)"
754
+ ]
755
+ },
756
+ "metadata": {},
757
+ "execution_count": 34
758
+ }
759
+ ]
760
+ },
761
+ {
762
+ "cell_type": "code",
763
+ "metadata": {
764
+ "colab": {
765
+ "base_uri": "https://localhost:8080/"
766
+ },
767
+ "id": "42d82e7e",
768
+ "outputId": "81ccfc58-d19f-4c0d-b329-de45f0706430"
769
+ },
770
+ "source": [
771
+ "Y.shape"
772
+ ],
773
+ "execution_count": null,
774
+ "outputs": [
775
+ {
776
+ "output_type": "execute_result",
777
+ "data": {
778
+ "text/plain": [
779
+ "(891,)"
780
+ ]
781
+ },
782
+ "metadata": {},
783
+ "execution_count": 35
784
+ }
785
+ ]
786
+ },
787
+ {
788
+ "cell_type": "markdown",
789
+ "source": [
790
+ "Dealing with messy data"
791
+ ],
792
+ "metadata": {
793
+ "id": "BN_Y-5xcReHs"
794
+ }
795
+ },
796
+ {
797
+ "cell_type": "code",
798
+ "source": [
799
+ "import seaborn as sns\n",
800
+ "import numpy as np\n",
801
+ "import matplotlib.pyplot as plt\n",
802
+ "tips = sns.load_dataset('titanic')"
803
+ ],
804
+ "metadata": {
805
+ "id": "6V7NzckGO9cR"
806
+ },
807
+ "execution_count": null,
808
+ "outputs": []
809
+ },
810
+ {
811
+ "cell_type": "markdown",
812
+ "metadata": {
813
+ "id": "c79dedfd"
814
+ },
815
+ "source": [
816
+ "# Task\n",
817
+ "Check for and handle missing values and outliers in the features matrix `X`. Then, summarize the findings and the methods used to address them."
818
+ ]
819
+ },
820
+ {
821
+ "cell_type": "markdown",
822
+ "metadata": {
823
+ "id": "a33d522d"
824
+ },
825
+ "source": [
826
+ "## Check Missing Values\n",
827
+ "\n",
828
+ "### Subtask:\n",
829
+ "Identify and count the number of missing values in each column of the features matrix `X`. This will help us understand the extent of missing data.\n"
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "markdown",
834
+ "metadata": {
835
+ "id": "ea922db0"
836
+ },
837
+ "source": [
838
+ "**Reasoning**:\n",
839
+ "To identify and count missing values in each column of the features matrix `X`, I will use the `.isnull()` method followed by `.sum()` to get the total count for each column. This directly addresses the subtask's instructions.\n",
840
+ "\n"
841
+ ]
842
+ },
843
+ {
844
+ "cell_type": "code",
845
+ "metadata": {
846
+ "id": "d43f1b5d",
847
+ "colab": {
848
+ "base_uri": "https://localhost:8080/"
849
+ },
850
+ "outputId": "b6879fd0-16bf-4fc1-c951-b2cedc66202b"
851
+ },
852
+ "source": [
853
+ "missing_values = X.isnull().sum()\n",
854
+ "print(\"Missing values per column in X:\")\n",
855
+ "print(missing_values)"
856
+ ],
857
+ "execution_count": 37,
858
+ "outputs": [
859
+ {
860
+ "output_type": "stream",
861
+ "name": "stdout",
862
+ "text": [
863
+ "Missing values per column in X:\n",
864
+ "sex 0\n",
865
+ "age 177\n",
866
+ "fare 0\n",
867
+ "class 0\n",
868
+ "embark_town 2\n",
869
+ "alone 0\n",
870
+ "dtype: int64\n"
871
+ ]
872
+ }
873
+ ]
874
+ },
875
+ {
876
+ "cell_type": "markdown",
877
+ "metadata": {
878
+ "id": "9dfd8c70"
879
+ },
880
+ "source": [
881
+ "# Task\n",
882
+ "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column."
883
+ ]
884
+ },
885
+ {
886
+ "cell_type": "markdown",
887
+ "metadata": {
888
+ "id": "26083663"
889
+ },
890
+ "source": [
891
+ "## Handle Missing Values in 'age'\n",
892
+ "\n",
893
+ "### Subtask:\n",
894
+ "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column.\n"
895
+ ]
896
+ },
897
+ {
898
+ "cell_type": "markdown",
899
+ "metadata": {
900
+ "id": "6d688010"
901
+ },
902
+ "source": [
903
+ "**Reasoning**:\n",
904
+ "To impute the missing values in the 'age' column, I will first calculate its median as specified in the instructions.\n",
905
+ "\n"
906
+ ]
907
+ },
908
+ {
909
+ "cell_type": "code",
910
+ "metadata": {
911
+ "colab": {
912
+ "base_uri": "https://localhost:8080/"
913
+ },
914
+ "id": "c8c06424",
915
+ "outputId": "0b48740b-a9d0-4593-a370-c99d172892fc"
916
+ },
917
+ "source": [
918
+ "median_age = X['age'].median()\n",
919
+ "print(f\"Median age: {median_age}\")"
920
+ ],
921
+ "execution_count": 40,
922
+ "outputs": [
923
+ {
924
+ "output_type": "stream",
925
+ "name": "stdout",
926
+ "text": [
927
+ "Median age: 28.0\n"
928
+ ]
929
+ }
930
+ ]
931
+ },
932
+ {
933
+ "cell_type": "markdown",
934
+ "metadata": {
935
+ "id": "40c208e7"
936
+ },
937
+ "source": [
938
+ "**Reasoning**:\n",
939
+ "Now that the median age has been calculated, I will use it to fill the missing values in the 'age' column of DataFrame `X`, and then verify the imputation by checking for remaining missing values.\n",
940
+ "\n"
941
+ ]
942
+ },
943
+ {
944
+ "cell_type": "code",
945
+ "metadata": {
946
+ "colab": {
947
+ "base_uri": "https://localhost:8080/"
948
+ },
949
+ "id": "77b9c2d7",
950
+ "outputId": "c63b0165-7ffb-43ca-879f-979f7724b4cc"
951
+ },
952
+ "source": [
953
+ "features = [\"age\", \"fare\"]\n",
954
+ "for feature in features:\n",
955
+ " min_ = X[feature].mean() - (3 * X[feature].std())\n",
956
+ " max_ = X[feature].mean() + (3 * X[feature].std())\n",
957
+ " X = X[X[feature] <= max_]\n",
958
+ " X = X[X[feature] >= min_]\n",
959
+ " print(feature, \":\", X.shape)"
960
+ ],
961
+ "execution_count": 46,
962
+ "outputs": [
963
+ {
964
+ "output_type": "stream",
965
+ "name": "stdout",
966
+ "text": [
967
+ "age : (884, 6)\n",
968
+ "fare : (864, 6)\n"
969
+ ]
970
+ }
971
+ ]
972
+ },
973
+ {
974
+ "cell_type": "code",
975
+ "source": [
976
+ "features = [\"sex\", \"class\", \"embark_town\", \"alone\"]\n",
977
+ "for feature in features:\n",
978
+ " count_ = X[feature].value_counts()\n",
979
+ " print(feature)\n",
980
+ " print(count_, \"\\n\")"
981
+ ],
982
+ "metadata": {
983
+ "colab": {
984
+ "base_uri": "https://localhost:8080/"
985
+ },
986
+ "id": "_PFKkCUKW1JE",
987
+ "outputId": "4a4c1e72-57a0-4a02-a591-d5a9d8781c33"
988
+ },
989
+ "execution_count": 47,
990
+ "outputs": [
991
+ {
992
+ "output_type": "stream",
993
+ "name": "stdout",
994
+ "text": [
995
+ "sex\n",
996
+ "sex\n",
997
+ "male 562\n",
998
+ "female 302\n",
999
+ "Name: count, dtype: int64 \n",
1000
+ "\n",
1001
+ "class\n",
1002
+ "class\n",
1003
+ "Third 489\n",
1004
+ "First 192\n",
1005
+ "Second 183\n",
1006
+ "Name: count, dtype: int64 \n",
1007
+ "\n",
1008
+ "embark_town\n",
1009
+ "embark_town\n",
1010
+ "Southampton 632\n",
1011
+ "Cherbourg 154\n",
1012
+ "Queenstown 76\n",
1013
+ "Name: count, dtype: int64 \n",
1014
+ "\n",
1015
+ "alone\n",
1016
+ "alone\n",
1017
+ "True 524\n",
1018
+ "False 340\n",
1019
+ "Name: count, dtype: int64 \n",
1020
+ "\n"
1021
+ ]
1022
+ }
1023
+ ]
1024
+ },
1025
+ {
1026
+ "cell_type": "code",
1027
+ "source": [
1028
+ "enc = LabelEncoder()\n",
1029
+ "X[\"sex\"] = enc.fit_transform(X['sex'].astype('str'))\n",
1030
+ "X[\"class\"] = enc.fit_transform(X['class'].astype('str'))\n",
1031
+ "X[\"embark_town\"] = enc.fit_transform(X['embark_town'].\\\n",
1032
+ " astype('str'))\n",
1033
+ "X[\"alone\"] = enc.fit_transform(X['alone'].astype('str'))\n",
1034
+ "X.head()"
1035
+ ],
1036
+ "metadata": {
1037
+ "colab": {
1038
+ "base_uri": "https://localhost:8080/",
1039
+ "height": 206
1040
+ },
1041
+ "id": "AwsqNomXW45N",
1042
+ "outputId": "990b54f7-d636-423f-8522-73af7a5b9cca"
1043
+ },
1044
+ "execution_count": 49,
1045
+ "outputs": [
1046
+ {
1047
+ "output_type": "execute_result",
1048
+ "data": {
1049
+ "text/plain": [
1050
+ " sex age fare class embark_town alone\n",
1051
+ "0 1 22.0 7.2500 2 2 0\n",
1052
+ "1 0 38.0 71.2833 0 0 0\n",
1053
+ "2 0 26.0 7.9250 2 2 1\n",
1054
+ "3 0 35.0 53.1000 0 2 0\n",
1055
+ "4 1 35.0 8.0500 2 2 1"
1056
+ ],
1057
+ "text/html": [
1058
+ "\n",
1059
+ " <div id=\"df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8\" class=\"colab-df-container\">\n",
1060
+ " <div>\n",
1061
+ "<style scoped>\n",
1062
+ " .dataframe tbody tr th:only-of-type {\n",
1063
+ " vertical-align: middle;\n",
1064
+ " }\n",
1065
+ "\n",
1066
+ " .dataframe tbody tr th {\n",
1067
+ " vertical-align: top;\n",
1068
+ " }\n",
1069
+ "\n",
1070
+ " .dataframe thead th {\n",
1071
+ " text-align: right;\n",
1072
+ " }\n",
1073
+ "</style>\n",
1074
+ "<table border=\"1\" class=\"dataframe\">\n",
1075
+ " <thead>\n",
1076
+ " <tr style=\"text-align: right;\">\n",
1077
+ " <th></th>\n",
1078
+ " <th>sex</th>\n",
1079
+ " <th>age</th>\n",
1080
+ " <th>fare</th>\n",
1081
+ " <th>class</th>\n",
1082
+ " <th>embark_town</th>\n",
1083
+ " <th>alone</th>\n",
1084
+ " </tr>\n",
1085
+ " </thead>\n",
1086
+ " <tbody>\n",
1087
+ " <tr>\n",
1088
+ " <th>0</th>\n",
1089
+ " <td>1</td>\n",
1090
+ " <td>22.0</td>\n",
1091
+ " <td>7.2500</td>\n",
1092
+ " <td>2</td>\n",
1093
+ " <td>2</td>\n",
1094
+ " <td>0</td>\n",
1095
+ " </tr>\n",
1096
+ " <tr>\n",
1097
+ " <th>1</th>\n",
1098
+ " <td>0</td>\n",
1099
+ " <td>38.0</td>\n",
1100
+ " <td>71.2833</td>\n",
1101
+ " <td>0</td>\n",
1102
+ " <td>0</td>\n",
1103
+ " <td>0</td>\n",
1104
+ " </tr>\n",
1105
+ " <tr>\n",
1106
+ " <th>2</th>\n",
1107
+ " <td>0</td>\n",
1108
+ " <td>26.0</td>\n",
1109
+ " <td>7.9250</td>\n",
1110
+ " <td>2</td>\n",
1111
+ " <td>2</td>\n",
1112
+ " <td>1</td>\n",
1113
+ " </tr>\n",
1114
+ " <tr>\n",
1115
+ " <th>3</th>\n",
1116
+ " <td>0</td>\n",
1117
+ " <td>35.0</td>\n",
1118
+ " <td>53.1000</td>\n",
1119
+ " <td>0</td>\n",
1120
+ " <td>2</td>\n",
1121
+ " <td>0</td>\n",
1122
+ " </tr>\n",
1123
+ " <tr>\n",
1124
+ " <th>4</th>\n",
1125
+ " <td>1</td>\n",
1126
+ " <td>35.0</td>\n",
1127
+ " <td>8.0500</td>\n",
1128
+ " <td>2</td>\n",
1129
+ " <td>2</td>\n",
1130
+ " <td>1</td>\n",
1131
+ " </tr>\n",
1132
+ " </tbody>\n",
1133
+ "</table>\n",
1134
+ "</div>\n",
1135
+ " <div class=\"colab-df-buttons\">\n",
1136
+ "\n",
1137
+ " <div class=\"colab-df-container\">\n",
1138
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8')\"\n",
1139
+ " title=\"Convert this dataframe to an interactive table.\"\n",
1140
+ " style=\"display:none;\">\n",
1141
+ "\n",
1142
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
1143
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
1144
+ " </svg>\n",
1145
+ " </button>\n",
1146
+ "\n",
1147
+ " <style>\n",
1148
+ " .colab-df-container {\n",
1149
+ " display:flex;\n",
1150
+ " gap: 12px;\n",
1151
+ " }\n",
1152
+ "\n",
1153
+ " .colab-df-convert {\n",
1154
+ " background-color: #E8F0FE;\n",
1155
+ " border: none;\n",
1156
+ " border-radius: 50%;\n",
1157
+ " cursor: pointer;\n",
1158
+ " display: none;\n",
1159
+ " fill: #1967D2;\n",
1160
+ " height: 32px;\n",
1161
+ " padding: 0 0 0 0;\n",
1162
+ " width: 32px;\n",
1163
+ " }\n",
1164
+ "\n",
1165
+ " .colab-df-convert:hover {\n",
1166
+ " background-color: #E2EBFA;\n",
1167
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1168
+ " fill: #174EA6;\n",
1169
+ " }\n",
1170
+ "\n",
1171
+ " .colab-df-buttons div {\n",
1172
+ " margin-bottom: 4px;\n",
1173
+ " }\n",
1174
+ "\n",
1175
+ " [theme=dark] .colab-df-convert {\n",
1176
+ " background-color: #3B4455;\n",
1177
+ " fill: #D2E3FC;\n",
1178
+ " }\n",
1179
+ "\n",
1180
+ " [theme=dark] .colab-df-convert:hover {\n",
1181
+ " background-color: #434B5C;\n",
1182
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1183
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1184
+ " fill: #FFFFFF;\n",
1185
+ " }\n",
1186
+ " </style>\n",
1187
+ "\n",
1188
+ " <script>\n",
1189
+ " const buttonEl =\n",
1190
+ " document.querySelector('#df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8 button.colab-df-convert');\n",
1191
+ " buttonEl.style.display =\n",
1192
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1193
+ "\n",
1194
+ " async function convertToInteractive(key) {\n",
1195
+ " const element = document.querySelector('#df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8');\n",
1196
+ " const dataTable =\n",
1197
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1198
+ " [key], {});\n",
1199
+ " if (!dataTable) return;\n",
1200
+ "\n",
1201
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1202
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1203
+ " + ' to learn more about interactive tables.';\n",
1204
+ " element.innerHTML = '';\n",
1205
+ " dataTable['output_type'] = 'display_data';\n",
1206
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1207
+ " const docLink = document.createElement('div');\n",
1208
+ " docLink.innerHTML = docLinkHtml;\n",
1209
+ " element.appendChild(docLink);\n",
1210
+ " }\n",
1211
+ " </script>\n",
1212
+ " </div>\n",
1213
+ "\n",
1214
+ "\n",
1215
+ " </div>\n",
1216
+ " </div>\n"
1217
+ ],
1218
+ "application/vnd.google.colaboratory.intrinsic+json": {
1219
+ "type": "dataframe",
1220
+ "variable_name": "X",
1221
+ "summary": "{\n \"name\": \"X\",\n \"rows\": 864,\n \"fields\": [\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.498758947613258,\n \"min\": 0.42,\n \"max\": 66.0,\n \"num_unique_values\": 83,\n \"samples\": [\n 5.0,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.400192357023762,\n \"min\": 0.0,\n \"max\": 164.8667,\n \"num_unique_values\": 239,\n \"samples\": [\n 7.8958,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
1222
+ }
1223
+ },
1224
+ "metadata": {},
1225
+ "execution_count": 49
1226
+ }
1227
+ ]
1228
+ }
1229
+ ]
1230
+ }