{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyOanFyooUUWZniR03cv72o+",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"
"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cMIjQEwLJPKQ",
"outputId": "c3c64dff-48e9-4dab-e007-fc9cf25753e9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'The-Machine-Learning-Workshop'...\n",
"remote: Enumerating objects: 805, done.\u001b[K\n",
"remote: Counting objects: 100% (23/23), done.\u001b[K\n",
"remote: Compressing objects: 100% (15/15), done.\u001b[K\n",
"remote: Total 805 (delta 15), reused 8 (delta 8), pack-reused 782 (from 1)\u001b[K\n",
"Receiving objects: 100% (805/805), 10.36 MiB | 9.64 MiB/s, done.\n",
"Resolving deltas: 100% (293/293), done.\n"
]
}
],
"source": [
"!git clone https://github.com/MsSaidat25/The-Machine-Learning-Workshop.git"
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"os.chdir('/content/The-Machine-Learning-Workshop')\n",
"!ls # see all folders/files"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fkumve32Jj-w",
"outputId": "f5f0c473-b4c3-4e25-b4e9-23db465582c1"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Chapter01 Chapter03 Chapter05 Graphics README.md\n",
"Chapter02 Chapter04 Chapter06 LICENSE requirements.txt\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8a5d3702",
"outputId": "312c313b-b695-48a4-e9f6-cad0ad63f4f9"
},
"source": [
"import os\n",
"os.chdir('/content/The-Machine-Learning-Workshop/Chapter01')\n",
"!ls"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Activity1.01 Exercise1.01 Exercise1.03\n",
"Activity1.02 Exercise1.02 Exercise1.04\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "99014702",
"outputId": "a7833725-7b7b-4655-8a28-5f0898678ede"
},
"source": [
"import json\n",
"\n",
"notebook_path = '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'\n",
"\n",
"with open(notebook_path, 'r') as f:\n",
" notebook_content = json.load(f)\n",
"\n",
"cells_to_generate = []\n",
"for cell in notebook_content['cells']:\n",
" if cell['cell_type'] == 'code':\n",
" cells_to_generate.append({'cell_type': 'python', 'code': ''.join(cell['source'])})\n",
" elif cell['cell_type'] == 'markdown':\n",
" cells_to_generate.append({'cell_type': 'markdown', 'code': ''.join(cell['source'])})\n",
"\n",
"# This list will be used by the next command to generate the actual cells.\n",
"# For now, I will just print the first few cells to confirm the parsing.\n",
"print(f\"Found {len(cells_to_generate)} cells in the notebook. Preview of the first cell:\\n{cells_to_generate[0]['code'] if cells_to_generate else 'No cells found.'}\")"
],
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Found 5 cells in the notebook. Preview of the first cell:\n",
"import seaborn as sns\n",
"titanic = sns.load_dataset('titanic')\n",
"titanic.head(10)\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "12666191",
"outputId": "cb746e2f-c18d-4518-88c9-d6e3a401b318"
},
"source": [
"import os\n",
"os.chdir('/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01')\n",
"!ls"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Activity1_01.ipynb titanic.csv unit_test_activity1_01.ipynb\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "17425fb5",
"outputId": "082e27d0-240f-4fd8-da28-828aeda873a8"
},
"source": [
"!cat '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'"
],
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"cells\": [\n",
" {\n",
" \"cell_type\": \"code\",\n",
" \"execution_count\": 4,\n",
" \"metadata\": {},\n",
" \"outputs\": [\n",
" {\n",
" \"data\": {\n",
" \"text/html\": [\n",
" \"
\\n\",\n",
" \"\\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" \\n\",\n",
" \" | \\n\",\n",
" \" survived | \\n\",\n",
" \" pclass | \\n\",\n",
" \" sex | \\n\",\n",
" \" age | \\n\",\n",
" \" sibsp | \\n\",\n",
" \" parch | \\n\",\n",
" \" fare | \\n\",\n",
" \" embarked | \\n\",\n",
" \" class | \\n\",\n",
" \" who | \\n\",\n",
" \" adult_male | \\n\",\n",
" \" deck | \\n\",\n",
" \" embark_town | \\n\",\n",
" \" alive | \\n\",\n",
" \" alone | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" \\n\",\n",
" \" \\n\",\n",
" \" | 0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" male | \\n\",\n",
" \" 22.0 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 7.2500 | \\n\",\n",
" \" S | \\n\",\n",
" \" Third | \\n\",\n",
" \" man | \\n\",\n",
" \" True | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" no | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 1 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" female | \\n\",\n",
" \" 38.0 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 71.2833 | \\n\",\n",
" \" C | \\n\",\n",
" \" First | \\n\",\n",
" \" woman | \\n\",\n",
" \" False | \\n\",\n",
" \" C | \\n\",\n",
" \" Cherbourg | \\n\",\n",
" \" yes | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 2 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" female | \\n\",\n",
" \" 26.0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 7.9250 | \\n\",\n",
" \" S | \\n\",\n",
" \" Third | \\n\",\n",
" \" woman | \\n\",\n",
" \" False | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" yes | \\n\",\n",
" \" True | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 3 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" female | \\n\",\n",
" \" 35.0 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 53.1000 | \\n\",\n",
" \" S | \\n\",\n",
" \" First | \\n\",\n",
" \" woman | \\n\",\n",
" \" False | \\n\",\n",
" \" C | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" yes | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 4 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" male | \\n\",\n",
" \" 35.0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 8.0500 | \\n\",\n",
" \" S | \\n\",\n",
" \" Third | \\n\",\n",
" \" man | \\n\",\n",
" \" True | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" no | \\n\",\n",
" \" True | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 5 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" male | \\n\",\n",
" \" NaN | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 8.4583 | \\n\",\n",
" \" Q | \\n\",\n",
" \" Third | \\n\",\n",
" \" man | \\n\",\n",
" \" True | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Queenstown | \\n\",\n",
" \" no | \\n\",\n",
" \" True | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 6 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" male | \\n\",\n",
" \" 54.0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 51.8625 | \\n\",\n",
" \" S | \\n\",\n",
" \" First | \\n\",\n",
" \" man | \\n\",\n",
" \" True | \\n\",\n",
" \" E | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" no | \\n\",\n",
" \" True | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 7 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" male | \\n\",\n",
" \" 2.0 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 21.0750 | \\n\",\n",
" \" S | \\n\",\n",
" \" Third | \\n\",\n",
" \" child | \\n\",\n",
" \" False | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" no | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 8 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 3 | \\n\",\n",
" \" female | \\n\",\n",
" \" 27.0 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 2 | \\n\",\n",
" \" 11.1333 | \\n\",\n",
" \" S | \\n\",\n",
" \" Third | \\n\",\n",
" \" woman | \\n\",\n",
" \" False | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Southampton | \\n\",\n",
" \" yes | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \" | 9 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 2 | \\n\",\n",
" \" female | \\n\",\n",
" \" 14.0 | \\n\",\n",
" \" 1 | \\n\",\n",
" \" 0 | \\n\",\n",
" \" 30.0708 | \\n\",\n",
" \" C | \\n\",\n",
" \" Second | \\n\",\n",
" \" child | \\n\",\n",
" \" False | \\n\",\n",
" \" NaN | \\n\",\n",
" \" Cherbourg | \\n\",\n",
" \" yes | \\n\",\n",
" \" False | \\n\",\n",
" \"
\\n\",\n",
" \" \\n\",\n",
" \"
\\n\",\n",
" \"
\"\n",
" ],\n",
" \"text/plain\": [\n",
" \" survived pclass sex age sibsp parch fare embarked class \\\\\\n\",\n",
" \"0 0 3 male 22.0 1 0 7.2500 S Third \\n\",\n",
" \"1 1 1 female 38.0 1 0 71.2833 C First \\n\",\n",
" \"2 1 3 female 26.0 0 0 7.9250 S Third \\n\",\n",
" \"3 1 1 female 35.0 1 0 53.1000 S First \\n\",\n",
" \"4 0 3 male 35.0 0 0 8.0500 S Third \\n\",\n",
" \"5 0 3 male NaN 0 0 8.4583 Q Third \\n\",\n",
" \"6 0 1 male 54.0 0 0 51.8625 S First \\n\",\n",
" \"7 0 3 male 2.0 3 1 21.0750 S Third \\n\",\n",
" \"8 1 3 female 27.0 0 2 11.1333 S Third \\n\",\n",
" \"9 1 2 female 14.0 1 0 30.0708 C Second \\n\",\n",
" \"\\n\",\n",
" \" who adult_male deck embark_town alive alone \\n\",\n",
" \"0 man True NaN Southampton no False \\n\",\n",
" \"1 woman False C Cherbourg yes False \\n\",\n",
" \"2 woman False NaN Southampton yes True \\n\",\n",
" \"3 woman False C Southampton yes False \\n\",\n",
" \"4 man True NaN Southampton no True \\n\",\n",
" \"5 man True NaN Queenstown no True \\n\",\n",
" \"6 man True E Southampton no True \\n\",\n",
" \"7 child False NaN Southampton no False \\n\",\n",
" \"8 woman False NaN Southampton yes False \\n\",\n",
" \"9 child False NaN Cherbourg yes False \"\n",
" ]\n",
" },\n",
" \"execution_count\": 4,\n",
" \"metadata\": {},\n",
" \"output_type\": \"execute_result\"\n",
" }\n",
" ],\n",
" \"source\": [\n",
" \"import seaborn as sns\\n\",\n",
" \"titanic = sns.load_dataset('titanic')\\n\",\n",
" \"titanic.head(10)\"\n",
" ]\n",
" },\n",
" {\n",
" \"cell_type\": \"code\",\n",
" \"execution_count\": 5,\n",
" \"metadata\": {},\n",
" \"outputs\": [],\n",
" \"source\": [\n",
" \"X = titanic.drop('survived',axis = 1)\\n\",\n",
" \"Y = titanic['survived']\"\n",
" ]\n",
" },\n",
" {\n",
" \"cell_type\": \"code\",\n",
" \"execution_count\": 6,\n",
" \"metadata\": {},\n",
" \"outputs\": [\n",
" {\n",
" \"data\": {\n",
" \"text/plain\": [\n",
" \"(891, 14)\"\n",
" ]\n",
" },\n",
" \"execution_count\": 6,\n",
" \"metadata\": {},\n",
" \"output_type\": \"execute_result\"\n",
" }\n",
" ],\n",
" \"source\": [\n",
" \"X.shape\"\n",
" ]\n",
" },\n",
" {\n",
" \"cell_type\": \"code\",\n",
" \"execution_count\": 7,\n",
" \"metadata\": {},\n",
" \"outputs\": [\n",
" {\n",
" \"data\": {\n",
" \"text/plain\": [\n",
" \"(891,)\"\n",
" ]\n",
" },\n",
" \"execution_count\": 7,\n",
" \"metadata\": {},\n",
" \"output_type\": \"execute_result\"\n",
" }\n",
" ],\n",
" \"source\": [\n",
" \"Y.shape\"\n",
" ]\n",
" },\n",
" {\n",
" \"cell_type\": \"code\",\n",
" \"execution_count\": null,\n",
" \"metadata\": {},\n",
" \"outputs\": [],\n",
" \"source\": []\n",
" }\n",
" ],\n",
" \"metadata\": {\n",
" \"kernelspec\": {\n",
" \"display_name\": \"Python 3\",\n",
" \"language\": \"python\",\n",
" \"name\": \"python3\"\n",
" },\n",
" \"language_info\": {\n",
" \"codemirror_mode\": {\n",
" \"name\": \"ipython\",\n",
" \"version\": 3\n",
" },\n",
" \"file_extension\": \".py\",\n",
" \"mimetype\": \"text/x-python\",\n",
" \"name\": \"python\",\n",
" \"nbconvert_exporter\": \"python\",\n",
" \"pygments_lexer\": \"ipython3\",\n",
" \"version\": \"3.6.7\"\n",
" }\n",
" },\n",
" \"nbformat\": 4,\n",
" \"nbformat_minor\": 2\n",
"}\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 383
},
"id": "1b2938e2",
"outputId": "50844bb9-e0d9-48bf-a6ae-79e9aa0a0c8c"
},
"source": [
"import seaborn as sns\n",
"titanic = sns.load_dataset('titanic')\n",
"titanic.head(10)"
],
"execution_count": 9,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" survived pclass sex age sibsp parch fare embarked class \\\n",
"0 0 3 male 22.0 1 0 7.2500 S Third \n",
"1 1 1 female 38.0 1 0 71.2833 C First \n",
"2 1 3 female 26.0 0 0 7.9250 S Third \n",
"3 1 1 female 35.0 1 0 53.1000 S First \n",
"4 0 3 male 35.0 0 0 8.0500 S Third \n",
"5 0 3 male NaN 0 0 8.4583 Q Third \n",
"6 0 1 male 54.0 0 0 51.8625 S First \n",
"7 0 3 male 2.0 3 1 21.0750 S Third \n",
"8 1 3 female 27.0 0 2 11.1333 S Third \n",
"9 1 2 female 14.0 1 0 30.0708 C Second \n",
"\n",
" who adult_male deck embark_town alive alone \n",
"0 man True NaN Southampton no False \n",
"1 woman False C Cherbourg yes False \n",
"2 woman False NaN Southampton yes True \n",
"3 woman False C Southampton yes False \n",
"4 man True NaN Southampton no True \n",
"5 man True NaN Queenstown no True \n",
"6 man True E Southampton no True \n",
"7 child False NaN Southampton no False \n",
"8 woman False NaN Southampton yes False \n",
"9 child False NaN Cherbourg yes False "
],
"text/html": [
"\n",
" \n",
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" survived | \n",
" pclass | \n",
" sex | \n",
" age | \n",
" sibsp | \n",
" parch | \n",
" fare | \n",
" embarked | \n",
" class | \n",
" who | \n",
" adult_male | \n",
" deck | \n",
" embark_town | \n",
" alive | \n",
" alone | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 22.0 | \n",
" 1 | \n",
" 0 | \n",
" 7.2500 | \n",
" S | \n",
" Third | \n",
" man | \n",
" True | \n",
" NaN | \n",
" Southampton | \n",
" no | \n",
" False | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 38.0 | \n",
" 1 | \n",
" 0 | \n",
" 71.2833 | \n",
" C | \n",
" First | \n",
" woman | \n",
" False | \n",
" C | \n",
" Cherbourg | \n",
" yes | \n",
" False | \n",
"
\n",
" \n",
" | 2 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 26.0 | \n",
" 0 | \n",
" 0 | \n",
" 7.9250 | \n",
" S | \n",
" Third | \n",
" woman | \n",
" False | \n",
" NaN | \n",
" Southampton | \n",
" yes | \n",
" True | \n",
"
\n",
" \n",
" | 3 | \n",
" 1 | \n",
" 1 | \n",
" female | \n",
" 35.0 | \n",
" 1 | \n",
" 0 | \n",
" 53.1000 | \n",
" S | \n",
" First | \n",
" woman | \n",
" False | \n",
" C | \n",
" Southampton | \n",
" yes | \n",
" False | \n",
"
\n",
" \n",
" | 4 | \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 35.0 | \n",
" 0 | \n",
" 0 | \n",
" 8.0500 | \n",
" S | \n",
" Third | \n",
" man | \n",
" True | \n",
" NaN | \n",
" Southampton | \n",
" no | \n",
" True | \n",
"
\n",
" \n",
" | 5 | \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" NaN | \n",
" 0 | \n",
" 0 | \n",
" 8.4583 | \n",
" Q | \n",
" Third | \n",
" man | \n",
" True | \n",
" NaN | \n",
" Queenstown | \n",
" no | \n",
" True | \n",
"
\n",
" \n",
" | 6 | \n",
" 0 | \n",
" 1 | \n",
" male | \n",
" 54.0 | \n",
" 0 | \n",
" 0 | \n",
" 51.8625 | \n",
" S | \n",
" First | \n",
" man | \n",
" True | \n",
" E | \n",
" Southampton | \n",
" no | \n",
" True | \n",
"
\n",
" \n",
" | 7 | \n",
" 0 | \n",
" 3 | \n",
" male | \n",
" 2.0 | \n",
" 3 | \n",
" 1 | \n",
" 21.0750 | \n",
" S | \n",
" Third | \n",
" child | \n",
" False | \n",
" NaN | \n",
" Southampton | \n",
" no | \n",
" False | \n",
"
\n",
" \n",
" | 8 | \n",
" 1 | \n",
" 3 | \n",
" female | \n",
" 27.0 | \n",
" 0 | \n",
" 2 | \n",
" 11.1333 | \n",
" S | \n",
" Third | \n",
" woman | \n",
" False | \n",
" NaN | \n",
" Southampton | \n",
" yes | \n",
" False | \n",
"
\n",
" \n",
" | 9 | \n",
" 1 | \n",
" 2 | \n",
" female | \n",
" 14.0 | \n",
" 1 | \n",
" 0 | \n",
" 30.0708 | \n",
" C | \n",
" Second | \n",
" child | \n",
" False | \n",
" NaN | \n",
" Cherbourg | \n",
" yes | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
\n",
"
\n",
"
\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "titanic",
"summary": "{\n \"name\": \"titanic\",\n \"rows\": 891,\n \"fields\": [\n {\n \"column\": \"survived\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pclass\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.526497332334044,\n \"min\": 0.42,\n \"max\": 80.0,\n \"num_unique_values\": 88,\n \"samples\": [\n 0.75,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sibsp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 8,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"parch\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.693428597180905,\n \"min\": 0.0,\n \"max\": 512.3292,\n \"num_unique_values\": 248,\n \"samples\": [\n 11.2417,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embarked\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"S\",\n \"C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Third\",\n \"First\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"who\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"man\",\n \"woman\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"adult_male\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n false,\n true\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deck\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"C\",\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Southampton\",\n \"Cherbourg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alive\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"yes\",\n \"no\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 9
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "f5507dec"
},
"source": [
"X = titanic.drop('survived',axis = 1)\n",
"Y = titanic['survived']"
],
"execution_count": 10,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "15cb730c",
"outputId": "013935cc-3485-4464-92f7-a1d35610398e"
},
"source": [
"X.shape"
],
"execution_count": 11,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(891, 14)"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "42d82e7e",
"outputId": "569c12a3-aa63-412e-dbda-9e218ac0fcb2"
},
"source": [
"Y.shape"
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(891,)"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "markdown",
"source": [
"Dealing with messy data"
],
"metadata": {
"id": "BN_Y-5xcReHs"
}
},
{
"cell_type": "code",
"source": [
"import seaborn as sns\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"tips = sns.load_dataset('tips')"
],
"metadata": {
"id": "6V7NzckGO9cR"
},
"execution_count": 13,
"outputs": []
},
{
"cell_type": "code",
"source": [
"size = tips[\"size\"]\n",
"size.loc[:15] = np.nan\n",
"size.head(20)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 818
},
"id": "P_r3-rSpPMoe",
"outputId": "31862675-e3c0-4921-e31d-a1db2a8ff466"
},
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/tmp/ipykernel_250/2240814414.py:2: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" size.loc[:15] = np.nan\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 NaN\n",
"1 NaN\n",
"2 NaN\n",
"3 NaN\n",
"4 NaN\n",
"5 NaN\n",
"6 NaN\n",
"7 NaN\n",
"8 NaN\n",
"9 NaN\n",
"10 NaN\n",
"11 NaN\n",
"12 NaN\n",
"13 NaN\n",
"14 NaN\n",
"15 NaN\n",
"16 3.0\n",
"17 3.0\n",
"18 3.0\n",
"19 3.0\n",
"Name: size, dtype: float64"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" size | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" NaN | \n",
"
\n",
" \n",
" | 1 | \n",
" NaN | \n",
"
\n",
" \n",
" | 2 | \n",
" NaN | \n",
"
\n",
" \n",
" | 3 | \n",
" NaN | \n",
"
\n",
" \n",
" | 4 | \n",
" NaN | \n",
"
\n",
" \n",
" | 5 | \n",
" NaN | \n",
"
\n",
" \n",
" | 6 | \n",
" NaN | \n",
"
\n",
" \n",
" | 7 | \n",
" NaN | \n",
"
\n",
" \n",
" | 8 | \n",
" NaN | \n",
"
\n",
" \n",
" | 9 | \n",
" NaN | \n",
"
\n",
" \n",
" | 10 | \n",
" NaN | \n",
"
\n",
" \n",
" | 11 | \n",
" NaN | \n",
"
\n",
" \n",
" | 12 | \n",
" NaN | \n",
"
\n",
" \n",
" | 13 | \n",
" NaN | \n",
"
\n",
" \n",
" | 14 | \n",
" NaN | \n",
"
\n",
" \n",
" | 15 | \n",
" NaN | \n",
"
\n",
" \n",
" | 16 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 17 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 18 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 19 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "code",
"source": [
"size.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rnPfIPArQOFB",
"outputId": "354e9712-9be9-4027-9f1f-5f1a10c0af5c"
},
"execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(244,)"
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"source": [
"size.isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iqtgBQ89QU81",
"outputId": "7f2d365d-32c8-4a27-c954-53974aef2182"
},
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"np.int64(16)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"source": [
"mean = size.mean()\n",
"mean = round(mean)\n",
"print(mean)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9LL1usX3QfWL",
"outputId": "bb5afd03-3ca1-4153-cbd8-c5cbd21834f3"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"3\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"size.fillna(mean, inplace=True)\n",
"size.head(20)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 711
},
"id": "ghShyvScQigg",
"outputId": "35e4f709-e614-40b9-ba4a-e092d874cdce"
},
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 3.0\n",
"1 3.0\n",
"2 3.0\n",
"3 3.0\n",
"4 3.0\n",
"5 3.0\n",
"6 3.0\n",
"7 3.0\n",
"8 3.0\n",
"9 3.0\n",
"10 3.0\n",
"11 3.0\n",
"12 3.0\n",
"13 3.0\n",
"14 3.0\n",
"15 3.0\n",
"16 3.0\n",
"17 3.0\n",
"18 3.0\n",
"19 3.0\n",
"Name: size, dtype: float64"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" size | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 8 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 9 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 10 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 11 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 12 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 13 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 14 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 15 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 16 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 17 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 18 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 19 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "code",
"source": [
"size.fillna(mean, inplace=True)\n",
"size.head(20)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 711
},
"id": "_mQUexwyQm1L",
"outputId": "cf4b0447-603a-4236-80e6-0579ed7af235"
},
"execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"0 3.0\n",
"1 3.0\n",
"2 3.0\n",
"3 3.0\n",
"4 3.0\n",
"5 3.0\n",
"6 3.0\n",
"7 3.0\n",
"8 3.0\n",
"9 3.0\n",
"10 3.0\n",
"11 3.0\n",
"12 3.0\n",
"13 3.0\n",
"14 3.0\n",
"15 3.0\n",
"16 3.0\n",
"17 3.0\n",
"18 3.0\n",
"19 3.0\n",
"Name: size, dtype: float64"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" size | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 1 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 5 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 6 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 7 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 8 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 9 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 10 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 11 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 12 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 13 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 14 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 15 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 16 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 17 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 18 | \n",
" 3.0 | \n",
"
\n",
" \n",
" | 19 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "code",
"source": [
"plt.hist(size)\n",
"plt.show()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 430
},
"id": "5-eF8tU2QqML",
"outputId": "90cc9e79-d455-4bd3-81dc-df23ae164e78"
},
"execution_count": 21,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAH+9JREFUeJzt3XtwVPX9//HXhiWXYnZj0iabLQmkLTXcjURjhFqUjBiYFEaq4kSaIgO9JCikVUhHUFokwHhBaEyEsaAzUKqdgopjLA1K6hhCCKUjSBEqQipu0g5ml8QhRnJ+f7Tu77uSqqGbnM+uz8fMmXHPOXvy3jM6eXr2bNZhWZYlAAAAg8TYPQAAAMCnESgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjOO0e4BL0dPTozNnzigxMVEOh8PucQAAwBdgWZbOnTsnr9ermJjPvkYSkYFy5swZZWRk2D0GAAC4BC0tLRo6dOhn7hORgZKYmCjp3y/Q5XLZPA0AAPgiAoGAMjIygr/HP0tEBsonb+u4XC4CBQCACPNFbs/gJlkAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABjHafcA+PIavvQlu0fos3dXT7d7BAD4UuAKCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDh9DpT6+noVFRXJ6/XK4XBo586d/3XfH//4x3I4HFq3bl3I+rNnz6q4uFgul0tJSUmaN2+eOjo6+joKAACIUn0OlM7OTo0fP15VVVWfud+OHTu0b98+eb3ei7YVFxfryJEj2r17t3bt2qX6+notWLCgr6MAAIAo5ezrEwoLC1VYWPiZ+7z33ntauHChXnnlFU2fHvr19EePHlVtba2ampqUm5srSdqwYYOmTZumhx9+uNegAQAAXy5hvwelp6dHc+bM0b333qvRo0dftL2hoUFJSUnBOJGkgoICxcTEqLGxsddjdnV1KRAIhCwAACB6hT1Q1qxZI6fTqbvvvrvX7T6fT6mpqSHrnE6nkpOT5fP5en1OZWWl3G53cMnIyAj32AAAwCBhDZTm5mY9/vjj2rJlixwOR9iOW1FRIb/fH1xaWlrCdmwAAGCesAbKn//8Z7W1tSkzM1NOp1NOp1OnTp3Sz372Mw0fPlyS5PF41NbWFvK8jz/+WGfPnpXH4+n1uHFxcXK5XCELAACIXn2+SfazzJkzRwUFBSHrpk6dqjlz5mju3LmSpPz8fLW3t6u5uVkTJkyQJO3Zs0c9PT3Ky8sL5zgAACBC9TlQOjo6dOLEieDjkydP6tChQ0pOTlZmZqZSUlJC9h88eLA8Ho+uuOIKSdLIkSN18803a/78+aqpqVF3d7fKyso0e/ZsPsEDAAAkXcJbPAcOHFBOTo5ycnIkSeXl5crJydHy5cu/8DG2bt2q7OxsTZkyRdOmTdOkSZO0cePGvo4CAACiVJ+voEyePFmWZX3h/d99992L1iUnJ2vbtm19/dEAAOBLgu/iAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxulzoNTX16uoqEher1cOh0M7d+4Mbuvu7taSJUs0duxYDRkyRF6vVz/4wQ905syZkGOcPXtWxcXFcrlcSkpK0rx589TR0fE/vxgAABAd+hwonZ2dGj9+vKqqqi7a9uGHH+rgwYNatmyZDh48qD/84Q86duyYvve974XsV1xcrCNHjmj37t3atWuX6uvrtWDBgkt/FQAAIKo4LMuyLvnJDod27NihmTNn/td9mpqadM011+jUqVPKzMzU0aNHNWrUKDU1NSk3N1eSVFtbq2nTpukf//iHvF7v5/7cQCAgt9stv98vl8t1qePDZsOXvmT3CH327urpdo8AABGrL7+/+/0eFL/fL4fDoaSkJElSQ0ODkpKSgnEiSQUFBYqJiVFjY2Ovx+jq6lIgEAhZAABA9OrXQDl//ryWLFmiO+64I1hKPp9PqampIfs5nU4lJyfL5/P1epzKykq53e7gkpGR0Z9jAwAAm/VboHR3d+u2226TZVmqrq7+n45VUVEhv98fXFpaWsI0JQAAMJGzPw76SZycOnVKe/bsCXmfyePxqK2tLWT/jz/+WGfPnpXH4+n1eHFxcYqLi+uPUQEAgIHCfgXlkzg5fvy4/vSnPyklJSVke35+vtrb29Xc3Bxct2fPHvX09CgvLy/c4wAAgAjU5ysoHR0dOnHiRPDxyZMndejQISUnJys9PV3f//73dfDgQe3atUsXLlwI3leSnJys2NhYjRw5UjfffLPmz5+vmpoadXd3q6ysTLNnz/5Cn+ABAADRr8+BcuDAAd1www3Bx+Xl5ZKkkpISPfjgg3rhhRckSVdeeWXI81599VVNnjxZkrR161aVlZVpypQpiomJ0axZs7R+/fpLfAkAACDa9DlQJk+erM/60ylf5M+qJCcna9u2bX390QAA4EuC7+IBAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADG6XOg1NfXq6ioSF6vVw6HQzt37gzZblmWli9frvT0dCUkJKigoEDHjx8P2efs2bMqLi6Wy+VSUlKS5s2bp46Ojv/phQAAgOjR50Dp7OzU+PHjVVVV1ev2tWvXav369aqpqVFjY6OGDBmiqVOn6vz588F9iouLdeTIEe3evVu7du1SfX29FixYcOmvAgAARBVnX59QWFiowsLCXrdZlqV169bp/vvv14wZMyRJzzzzjNLS0rRz507Nnj1bR48eVW1trZqampSbmytJ2rBhg6ZNm6aHH35YXq/3f3g5AAAgGoT1HpSTJ0/K5/OpoKAguM7tdisvL08NDQ2SpIaGBiUlJQXjRJIKCgoUExOjxsbGXo/b1dWlQCAQsgAAgOgV1kDx+XySpLS0tJD1aWlpwW0+n0+pqakh251Op5KTk4P7fFplZaXcbndwycjICOfYAADAMBHxKZ6Kigr5/f7g0tLSYvdIAACgH4U1UDwejySptbU1ZH1ra2twm8fjUVtbW8j2jz/+WGfPng3u82lxcXFyuVwhCwAAiF5hDZSsrCx5PB7V1dUF1wUCATU2Nio/P1+SlJ+fr/b2djU3Nwf32bNnj3p6epSXlxfOcQAAQITq86d4Ojo6dOLEieDjkydP6tChQ0pOTlZmZqYWLVqklStXasSIEcrKytKyZcvk9Xo1c+ZMSdLIkSN18803a/78+aqpqVF3d7fKyso0e/ZsPsEDAAAkXUKgHDhwQDfccEPwcXl5uSSppKREW7Zs0X333afOzk4tWLBA7e3tmjRpkmpraxUfHx98ztatW1VWVqYpU6YoJiZGs2bN0vr168PwcgAAQDRwWJZl2T1EXwUCAbndbvn9fu5HiWDDl75k9wh99u7q6XaPAAARqy+/vyPiUzwAAODLhUABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYJ+yBcuHCBS1btkxZWVlKSEjQN7/5Tf3qV7+SZVnBfSzL0vLly5Wenq6EhAQVFBTo+PHj4R4FAABEqLAHypo1a1RdXa1f//rXOnr0qNasWaO1a9dqw4YNwX3Wrl2r9evXq6amRo2NjRoyZIimTp2q8+fPh3scAAAQgZzhPuAbb7yhGTNmaPr06ZKk4cOH67e//a32798v6d9XT9atW6f7779fM2bMkCQ988wzSktL086dOzV79uxwjwQAACJM2K+gXHfddaqrq9Pbb78tSfrrX/+q119/XYWFhZKkkydPyufzqaCgIPgct9utvLw8NTQ09HrMrq4uBQKBkAUAAESvsF9BWbp0qQKBgLKzszVo0CBduHBBDz30kIqLiyVJPp9PkpSWlhbyvLS0tOC2T6usrNSKFSvCPSoAADBU2K+gPPvss9q6dau2bdumgwcP6umnn9bDDz+sp59++pKPWVFRIb/fH1xaWlrCODEAADBN2K+g3HvvvVq6dGnwXpKxY8fq1KlTqqysVElJiTwejySptbVV6enpwee1trbqyiuv7PWYcXFxiouLC/eoAADAUGG/gvLhhx8qJib0sIMGDVJPT48kKSsrSx6PR3V1dcHtgUBAjY2Nys/PD/c4AAAgAoX9CkpRUZEeeughZWZmavTo0frLX/6iRx99VHfddZckyeFwaNGiRVq5cqVGjBihrKwsLVu2TF6vVzNnzgz3OAAAIAKFPVA2bNigZcuW6ac//ana2trk9Xr1ox/9SMuXLw/uc99996mzs1MLFixQe3u7Jk2apNraWsXHx4d7HAAAEIEc1v/9E68RIhAIyO12y+/3y+Vy2T0OLtHwpS/ZPUKfvbt6ut0jAEDE6svvb76LBwAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYJx+CZT33ntPd955p1JSUpSQkKCxY8fqwIEDwe2WZWn58uVKT09XQkKCCgoKdPz48f4YBQAARKCwB8oHH3ygiRMnavDgwXr55Zf11ltv6ZFHHtHll18e3Gft2rVav369ampq1NjYqCFDhmjq1Kk6f/58uMcBAAARyBnuA65Zs0YZGRnavHlzcF1WVlbwny3L0rp163T//fdrxowZkqRnnnlGaWlp2rlzp2bPnh3ukQAAQIQJ+xWUF154Qbm5ubr11luVmpqqnJwcbdq0Kbj95MmT8vl8KigoCK5zu93Ky8tTQ0NDuMcBAAARKOyB8s4776i6ulojRozQK6+8op/85Ce6++679fTTT0uSfD6fJCktLS3keWlpacFtn9bV1aVAIBCyAACA6BX2t3h6enqUm5urVatWSZJycnJ0+PBh1dTUqKSk5JKOWVlZqRUrVoRzTAAAYLCwX0FJT0/XqFGjQtaNHDlSp0+fliR5PB5JUmtra8g+ra2twW2fVlFRIb/fH1xaWlrCPTYAADBI2ANl4sSJOnbsWMi6t99+W8OGDZP07xtmPR6P6urqgtsDgYAaGxuVn5/f6zHj4uLkcrlCFgAAEL3C/hbP4sWLdd1112nVqlW67bbbtH//fm3cuFEbN26UJDkcDi1atEgrV67UiBEjlJWVpWXLlsnr9WrmzJnhHgcAAESgsAfK1VdfrR07dqiiokK//OUvlZWVpXXr1qm4uDi4z3333afOzk4tWLBA7e3tmjRpkmpraxUfHx/ucQAAQARyWJZl2T1EXwUCAbndbvn9ft7uiWDDl75k9wh99u7q6XaPAAARqy+/v/kuHgAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYByn3QMA6F98azSASMQVFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMbp90BZvXq1HA6HFi1aFFx3/vx5lZaWKiUlRZdddplmzZql1tbW/h4FAABEiH4NlKamJj355JMaN25cyPrFixfrxRdf1HPPPae9e/fqzJkzuuWWW/pzFAAAEEH6LVA6OjpUXFysTZs26fLLLw+u9/v9euqpp/Too4/qxhtv1IQJE7R582a98cYb2rdvX3+NAwAAIki/BUppaammT5+ugoKCkPXNzc3q7u4OWZ+dna3MzEw1NDT0eqyuri4FAoGQBQAARC9nfxx0+/btOnjwoJqami7a5vP5FBsbq6SkpJD1aWlp8vl8vR6vsrJSK1as6I9RAQCAgcJ+BaWlpUX33HOPtm7dqvj4+LAcs6KiQn6/P7i0tLSE5bgAAMBMYQ+U5uZmtbW16aqrrpLT6ZTT6dTevXu1fv16OZ1OpaWl6aOPPlJ7e3vI81pbW+XxeHo9ZlxcnFwuV8gCAACiV9jf4pkyZYrefPPNkHVz585Vdna2lixZooyMDA0ePFh1dXWaNWuWJOnYsWM6ffq08vPzwz0OAACIQGEPlMTERI0ZMyZk3ZAhQ5SSkhJcP2/ePJWXlys5OVkul0sLFy5Ufn6+rr322nCPAwAAIlC/3CT7eR577DHFxMRo1qxZ6urq0tSpU/XEE0/YMQoAADDQgATKa6+9FvI4Pj5eVVVVqqqqGogfDwAAIgzfxQMAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwzIN9mDADRbvjSl+weoc/eXT3d7hGA/4orKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwT9kCprKzU1VdfrcTERKWmpmrmzJk6duxYyD7nz59XaWmpUlJSdNlll2nWrFlqbW0N9ygAACBChT1Q9u7dq9LSUu3bt0+7d+9Wd3e3brrpJnV2dgb3Wbx4sV588UU999xz2rt3r86cOaNbbrkl3KMAAIAI5Qz3AWtra0Meb9myRampqWpubtb1118vv9+vp556Stu2bdONN94oSdq8ebNGjhypffv26dprrw33SAAAIML0+z0ofr9fkpScnCxJam5uVnd3twoKCoL7ZGdnKzMzUw0NDb0eo6urS4FAIGQBAADRq18DpaenR4sWLdLEiRM1ZswYSZLP51NsbKySkpJC9k1LS5PP5+v1OJWVlXK73cElIyOjP8cGAAA269dAKS0t1eHDh7V9+/b/6TgVFRXy+/3BpaWlJUwTAgAAE4X9HpRPlJWVadeuXaqvr9fQoUOD6z0ejz766CO1t7eHXEVpbW2Vx+Pp9VhxcXGKi4vrr1EBAIBhwn4FxbIslZWVaceOHdqzZ4+ysrJCtk+YMEGDBw9WXV1dcN2xY8d0+vRp5efnh3scAAAQgcJ+BaW0tFTbtm3T888/r8TExOB9JW63WwkJCXK73Zo3b57Ky8uVnJwsl8ulhQsXKj8/n0/wAAAASf0QKNXV1ZKkyZMnh6zfvHmzfvjDH0qSHnvsMcXExGjWrFnq6urS1KlT9cQTT4R7FAAAEKHCHiiWZX3uPvHx8aqqqlJVVVW4fzwAAIgCfBcPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADCO0+4BAAD4ooYvfcnuEfrs3dXT7R4hInEFBQAAGIdAAQAAxuEtnl5wCREAEC6R+DtFsv/3CldQAACAcQgUAABgHAIFAAAYh0ABAADGsTVQqqqqNHz4cMXHxysvL0/79++3cxwAAGAI2wLld7/7ncrLy/XAAw/o4MGDGj9+vKZOnaq2tja7RgIAAIawLVAeffRRzZ8/X3PnztWoUaNUU1Ojr3zlK/rNb35j10gAAMAQtvwdlI8++kjNzc2qqKgIrouJiVFBQYEaGhou2r+rq0tdXV3Bx36/X5IUCAT6Zb6erg/75bj9qb/ORX/iPA8MzvPA4DwPjEg8z5GqP/79+OSYlmV97r62BMq//vUvXbhwQWlpaSHr09LS9Le//e2i/SsrK7VixYqL1mdkZPTbjJHGvc7uCb4cOM8Dg/M8MDjP+Cz9+e/HuXPn5Ha7P3OfiPhLshUVFSovLw8+7unp0dmzZ5WSkiKHwxHWnxUIBJSRkaGWlha5XK6wHhv/H+d5YHCeBwbneWBwngdOf51ry7J07tw5eb3ez93XlkD56le/qkGDBqm1tTVkfWtrqzwez0X7x8XFKS4uLmRdUlJSf44ol8vFfwADgPM8MDjPA4PzPDA4zwOnP8715105+YQtN8nGxsZqwoQJqqurC67r6elRXV2d8vPz7RgJAAAYxLa3eMrLy1VSUqLc3Fxdc801WrdunTo7OzV37ly7RgIAAIawLVBuv/12/fOf/9Ty5cvl8/l05ZVXqra29qIbZwdaXFycHnjggYveUkJ4cZ4HBud5YHCeBwbneeCYcK4d1hf5rA8AAMAA4rt4AACAcQgUAABgHAIFAAAYh0ABAADGIVD+o76+XkVFRfJ6vXI4HNq5c6fdI0WdyspKXX311UpMTFRqaqpmzpypY8eO2T1WVKqurta4ceOCf2QpPz9fL7/8st1jRb3Vq1fL4XBo0aJFdo8SVR588EE5HI6QJTs72+6xotJ7772nO++8UykpKUpISNDYsWN14MABW2YhUP6js7NT48ePV1VVld2jRK29e/eqtLRU+/bt0+7du9Xd3a2bbrpJnZ2ddo8WdYYOHarVq1erublZBw4c0I033qgZM2boyJEjdo8WtZqamvTkk09q3Lhxdo8SlUaPHq33338/uLz++ut2jxR1PvjgA02cOFGDBw/Wyy+/rLfeekuPPPKILr/8clvmiYjv4hkIhYWFKiwstHuMqFZbWxvyeMuWLUpNTVVzc7Ouv/56m6aKTkVFRSGPH3roIVVXV2vfvn0aPXq0TVNFr46ODhUXF2vTpk1auXKl3eNEJafT2etXoSB81qxZo4yMDG3evDm4Lisry7Z5uIIC2/j9fklScnKyzZNEtwsXLmj79u3q7OzkqyT6SWlpqaZPn66CggK7R4lax48fl9fr1Te+8Q0VFxfr9OnTdo8UdV544QXl5ubq1ltvVWpqqnJycrRp0ybb5uEKCmzR09OjRYsWaeLEiRozZozd40SlN998U/n5+Tp//rwuu+wy7dixQ6NGjbJ7rKizfft2HTx4UE1NTXaPErXy8vK0ZcsWXXHFFXr//fe1YsUKfec739Hhw4eVmJho93hR45133lF1dbXKy8v1i1/8Qk1NTbr77rsVGxurkpKSAZ+HQIEtSktLdfjwYd5H7kdXXHGFDh06JL/fr9///vcqKSnR3r17iZQwamlp0T333KPdu3crPj7e7nGi1v99+33cuHHKy8vTsGHD9Oyzz2revHk2ThZdenp6lJubq1WrVkmScnJydPjwYdXU1NgSKLzFgwFXVlamXbt26dVXX9XQoUPtHidqxcbG6lvf+pYmTJigyspKjR8/Xo8//rjdY0WV5uZmtbW16aqrrpLT6ZTT6dTevXu1fv16OZ1OXbhwwe4Ro1JSUpK+/e1v68SJE3aPElXS09Mv+h+YkSNH2vZ2GldQMGAsy9LChQu1Y8cOvfbaa7befPVl1NPTo66uLrvHiCpTpkzRm2++GbJu7ty5ys7O1pIlSzRo0CCbJotuHR0d+vvf/645c+bYPUpUmThx4kV/+uHtt9/WsGHDbJmHQPmPjo6OkBo/efKkDh06pOTkZGVmZto4WfQoLS3Vtm3b9PzzzysxMVE+n0+S5Ha7lZCQYPN00aWiokKFhYXKzMzUuXPntG3bNr322mt65ZVX7B4tqiQmJl50D9WQIUOUkpLCvVVh9POf/1xFRUUaNmyYzpw5owceeECDBg3SHXfcYfdoUWXx4sW67rrrtGrVKt12223av3+/Nm7cqI0bN9ozkAXLsizr1VdftSRdtJSUlNg9WtTo7fxKsjZv3mz3aFHnrrvusoYNG2bFxsZaX/va16wpU6ZYf/zjH+0e60vhu9/9rnXPPffYPUZUuf3226309HQrNjbW+vrXv27dfvvt1okTJ+weKyq9+OKL1pgxY6y4uDgrOzvb2rhxo22zOCzLsuxJIwAAgN5xkywAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4/w8vLGekpacueAAAAABJRU5ErkJggg==\n"
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"min_val = size.mean() - (3 * size.std())\n",
"print(min_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_U-ot3whQzSA",
"outputId": "e43779ed-5fca-4010-d9a0-31a89568add0"
},
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-0.1974349065787404\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"max_val = size.mean() + (3 * size.std())\n",
"print(max_val)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jT4T3Z-eQ28V",
"outputId": "4260d170-e038-4610-eb87-0df7e8f21226"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"5.369566054119724\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"outliers = size[size > max_val]\n",
"outliers.count()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DE1r88vFQ6ZR",
"outputId": "0797c360-89e3-45d9-a951-b64e6f1a1597"
},
"execution_count": 24,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"np.int64(4)"
]
},
"metadata": {},
"execution_count": 24
}
]
},
{
"cell_type": "code",
"source": [
"print(outliers)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PTWFJ0CaQ93p",
"outputId": "15664ebb-ac46-4edb-b311-903ad25739b6"
},
"execution_count": 25,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"125 6.0\n",
"141 6.0\n",
"143 6.0\n",
"156 6.0\n",
"Name: size, dtype: float64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"age = size[size <= max_val]\n",
"age.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vDZ1IA-3RCHP",
"outputId": "9f770a1a-4439-4abf-a5a0-387566ae8d93"
},
"execution_count": 26,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(240,)"
]
},
"metadata": {},
"execution_count": 26
}
]
}
]
}