{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyOanFyooUUWZniR03cv72o+", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cMIjQEwLJPKQ", "outputId": "c3c64dff-48e9-4dab-e007-fc9cf25753e9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'The-Machine-Learning-Workshop'...\n", "remote: Enumerating objects: 805, done.\u001b[K\n", "remote: Counting objects: 100% (23/23), done.\u001b[K\n", "remote: Compressing objects: 100% (15/15), done.\u001b[K\n", "remote: Total 805 (delta 15), reused 8 (delta 8), pack-reused 782 (from 1)\u001b[K\n", "Receiving objects: 100% (805/805), 10.36 MiB | 9.64 MiB/s, done.\n", "Resolving deltas: 100% (293/293), done.\n" ] } ], "source": [ "!git clone https://github.com/MsSaidat25/The-Machine-Learning-Workshop.git" ] }, { "cell_type": "code", "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop')\n", "!ls # see all folders/files" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fkumve32Jj-w", "outputId": "f5f0c473-b4c3-4e25-b4e9-23db465582c1" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Chapter01 Chapter03 Chapter05 Graphics README.md\n", "Chapter02 Chapter04 Chapter06 LICENSE requirements.txt\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8a5d3702", "outputId": "312c313b-b695-48a4-e9f6-cad0ad63f4f9" }, "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01')\n", "!ls" ], "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Activity1.01 Exercise1.01 Exercise1.03\n", "Activity1.02 Exercise1.02 Exercise1.04\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "99014702", "outputId": "a7833725-7b7b-4655-8a28-5f0898678ede" }, "source": [ "import json\n", "\n", "notebook_path = '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'\n", "\n", "with open(notebook_path, 'r') as f:\n", " notebook_content = json.load(f)\n", "\n", "cells_to_generate = []\n", "for cell in notebook_content['cells']:\n", " if cell['cell_type'] == 'code':\n", " cells_to_generate.append({'cell_type': 'python', 'code': ''.join(cell['source'])})\n", " elif cell['cell_type'] == 'markdown':\n", " cells_to_generate.append({'cell_type': 'markdown', 'code': ''.join(cell['source'])})\n", "\n", "# This list will be used by the next command to generate the actual cells.\n", "# For now, I will just print the first few cells to confirm the parsing.\n", "print(f\"Found {len(cells_to_generate)} cells in the notebook. Preview of the first cell:\\n{cells_to_generate[0]['code'] if cells_to_generate else 'No cells found.'}\")" ], "execution_count": 8, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Found 5 cells in the notebook. Preview of the first cell:\n", "import seaborn as sns\n", "titanic = sns.load_dataset('titanic')\n", "titanic.head(10)\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "12666191", "outputId": "cb746e2f-c18d-4518-88c9-d6e3a401b318" }, "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01')\n", "!ls" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Activity1_01.ipynb titanic.csv unit_test_activity1_01.ipynb\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "17425fb5", "outputId": "082e27d0-240f-4fd8-da28-828aeda873a8" }, "source": [ "!cat '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'" ], "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "{\n", " \"cells\": [\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 4,\n", " \"metadata\": {},\n", " \"outputs\": [\n", " {\n", " \"data\": {\n", " \"text/html\": [\n", " \"
\\n\",\n", " \"\\n\",\n", " \"\\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \" \\n\",\n", " \"
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
503maleNaN008.4583QThirdmanTrueNaNQueenstownnoTrue
601male54.00051.8625SFirstmanTrueESouthamptonnoTrue
703male2.03121.0750SThirdchildFalseNaNSouthamptonnoFalse
813female27.00211.1333SThirdwomanFalseNaNSouthamptonyesFalse
912female14.01030.0708CSecondchildFalseNaNCherbourgyesFalse
\\n\",\n", " \"
\"\n", " ],\n", " \"text/plain\": [\n", " \" survived pclass sex age sibsp parch fare embarked class \\\\\\n\",\n", " \"0 0 3 male 22.0 1 0 7.2500 S Third \\n\",\n", " \"1 1 1 female 38.0 1 0 71.2833 C First \\n\",\n", " \"2 1 3 female 26.0 0 0 7.9250 S Third \\n\",\n", " \"3 1 1 female 35.0 1 0 53.1000 S First \\n\",\n", " \"4 0 3 male 35.0 0 0 8.0500 S Third \\n\",\n", " \"5 0 3 male NaN 0 0 8.4583 Q Third \\n\",\n", " \"6 0 1 male 54.0 0 0 51.8625 S First \\n\",\n", " \"7 0 3 male 2.0 3 1 21.0750 S Third \\n\",\n", " \"8 1 3 female 27.0 0 2 11.1333 S Third \\n\",\n", " \"9 1 2 female 14.0 1 0 30.0708 C Second \\n\",\n", " \"\\n\",\n", " \" who adult_male deck embark_town alive alone \\n\",\n", " \"0 man True NaN Southampton no False \\n\",\n", " \"1 woman False C Cherbourg yes False \\n\",\n", " \"2 woman False NaN Southampton yes True \\n\",\n", " \"3 woman False C Southampton yes False \\n\",\n", " \"4 man True NaN Southampton no True \\n\",\n", " \"5 man True NaN Queenstown no True \\n\",\n", " \"6 man True E Southampton no True \\n\",\n", " \"7 child False NaN Southampton no False \\n\",\n", " \"8 woman False NaN Southampton yes False \\n\",\n", " \"9 child False NaN Cherbourg yes False \"\n", " ]\n", " },\n", " \"execution_count\": 4,\n", " \"metadata\": {},\n", " \"output_type\": \"execute_result\"\n", " }\n", " ],\n", " \"source\": [\n", " \"import seaborn as sns\\n\",\n", " \"titanic = sns.load_dataset('titanic')\\n\",\n", " \"titanic.head(10)\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 5,\n", " \"metadata\": {},\n", " \"outputs\": [],\n", " \"source\": [\n", " \"X = titanic.drop('survived',axis = 1)\\n\",\n", " \"Y = titanic['survived']\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 6,\n", " \"metadata\": {},\n", " \"outputs\": [\n", " {\n", " \"data\": {\n", " \"text/plain\": [\n", " \"(891, 14)\"\n", " ]\n", " },\n", " \"execution_count\": 6,\n", " \"metadata\": {},\n", " \"output_type\": \"execute_result\"\n", " }\n", " ],\n", " \"source\": [\n", " \"X.shape\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": 7,\n", " \"metadata\": {},\n", " \"outputs\": [\n", " {\n", " \"data\": {\n", " \"text/plain\": [\n", " \"(891,)\"\n", " ]\n", " },\n", " \"execution_count\": 7,\n", " \"metadata\": {},\n", " \"output_type\": \"execute_result\"\n", " }\n", " ],\n", " \"source\": [\n", " \"Y.shape\"\n", " ]\n", " },\n", " {\n", " \"cell_type\": \"code\",\n", " \"execution_count\": null,\n", " \"metadata\": {},\n", " \"outputs\": [],\n", " \"source\": []\n", " }\n", " ],\n", " \"metadata\": {\n", " \"kernelspec\": {\n", " \"display_name\": \"Python 3\",\n", " \"language\": \"python\",\n", " \"name\": \"python3\"\n", " },\n", " \"language_info\": {\n", " \"codemirror_mode\": {\n", " \"name\": \"ipython\",\n", " \"version\": 3\n", " },\n", " \"file_extension\": \".py\",\n", " \"mimetype\": \"text/x-python\",\n", " \"name\": \"python\",\n", " \"nbconvert_exporter\": \"python\",\n", " \"pygments_lexer\": \"ipython3\",\n", " \"version\": \"3.6.7\"\n", " }\n", " },\n", " \"nbformat\": 4,\n", " \"nbformat_minor\": 2\n", "}\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 383 }, "id": "1b2938e2", "outputId": "50844bb9-e0d9-48bf-a6ae-79e9aa0a0c8c" }, "source": [ "import seaborn as sns\n", "titanic = sns.load_dataset('titanic')\n", "titanic.head(10)" ], "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " survived pclass sex age sibsp parch fare embarked class \\\n", "0 0 3 male 22.0 1 0 7.2500 S Third \n", "1 1 1 female 38.0 1 0 71.2833 C First \n", "2 1 3 female 26.0 0 0 7.9250 S Third \n", "3 1 1 female 35.0 1 0 53.1000 S First \n", "4 0 3 male 35.0 0 0 8.0500 S Third \n", "5 0 3 male NaN 0 0 8.4583 Q Third \n", "6 0 1 male 54.0 0 0 51.8625 S First \n", "7 0 3 male 2.0 3 1 21.0750 S Third \n", "8 1 3 female 27.0 0 2 11.1333 S Third \n", "9 1 2 female 14.0 1 0 30.0708 C Second \n", "\n", " who adult_male deck embark_town alive alone \n", "0 man True NaN Southampton no False \n", "1 woman False C Cherbourg yes False \n", "2 woman False NaN Southampton yes True \n", "3 woman False C Southampton yes False \n", "4 man True NaN Southampton no True \n", "5 man True NaN Queenstown no True \n", "6 man True E Southampton no True \n", "7 child False NaN Southampton no False \n", "8 woman False NaN Southampton yes False \n", "9 child False NaN Cherbourg yes False " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
503maleNaN008.4583QThirdmanTrueNaNQueenstownnoTrue
601male54.00051.8625SFirstmanTrueESouthamptonnoTrue
703male2.03121.0750SThirdchildFalseNaNSouthamptonnoFalse
813female27.00211.1333SThirdwomanFalseNaNSouthamptonyesFalse
912female14.01030.0708CSecondchildFalseNaNCherbourgyesFalse
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "titanic", "summary": "{\n \"name\": \"titanic\",\n \"rows\": 891,\n \"fields\": [\n {\n \"column\": \"survived\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pclass\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.526497332334044,\n \"min\": 0.42,\n \"max\": 80.0,\n \"num_unique_values\": 88,\n \"samples\": [\n 0.75,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sibsp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 8,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"parch\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.693428597180905,\n \"min\": 0.0,\n \"max\": 512.3292,\n \"num_unique_values\": 248,\n \"samples\": [\n 11.2417,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embarked\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"S\",\n \"C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Third\",\n \"First\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"who\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"man\",\n \"woman\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"adult_male\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n false,\n true\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deck\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"C\",\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Southampton\",\n \"Cherbourg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alive\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"yes\",\n \"no\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "id": "f5507dec" }, "source": [ "X = titanic.drop('survived',axis = 1)\n", "Y = titanic['survived']" ], "execution_count": 10, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "15cb730c", "outputId": "013935cc-3485-4464-92f7-a1d35610398e" }, "source": [ "X.shape" ], "execution_count": 11, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(891, 14)" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "42d82e7e", "outputId": "569c12a3-aa63-412e-dbda-9e218ac0fcb2" }, "source": [ "Y.shape" ], "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(891,)" ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "markdown", "source": [ "Dealing with messy data" ], "metadata": { "id": "BN_Y-5xcReHs" } }, { "cell_type": "code", "source": [ "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "tips = sns.load_dataset('tips')" ], "metadata": { "id": "6V7NzckGO9cR" }, "execution_count": 13, "outputs": [] }, { "cell_type": "code", "source": [ "size = tips[\"size\"]\n", "size.loc[:15] = np.nan\n", "size.head(20)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 818 }, "id": "P_r3-rSpPMoe", "outputId": "31862675-e3c0-4921-e31d-a1db2a8ff466" }, "execution_count": 14, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/tmp/ipykernel_250/2240814414.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " size.loc[:15] = np.nan\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "0 NaN\n", "1 NaN\n", "2 NaN\n", "3 NaN\n", "4 NaN\n", "5 NaN\n", "6 NaN\n", "7 NaN\n", "8 NaN\n", "9 NaN\n", "10 NaN\n", "11 NaN\n", "12 NaN\n", "13 NaN\n", "14 NaN\n", "15 NaN\n", "16 3.0\n", "17 3.0\n", "18 3.0\n", "19 3.0\n", "Name: size, dtype: float64" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
size
0NaN
1NaN
2NaN
3NaN
4NaN
5NaN
6NaN
7NaN
8NaN
9NaN
10NaN
11NaN
12NaN
13NaN
14NaN
15NaN
163.0
173.0
183.0
193.0
\n", "

" ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "code", "source": [ "size.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rnPfIPArQOFB", "outputId": "354e9712-9be9-4027-9f1f-5f1a10c0af5c" }, "execution_count": 15, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(244,)" ] }, "metadata": {}, "execution_count": 15 } ] }, { "cell_type": "code", "source": [ "size.isnull().sum()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iqtgBQ89QU81", "outputId": "7f2d365d-32c8-4a27-c954-53974aef2182" }, "execution_count": 17, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "np.int64(16)" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "mean = size.mean()\n", "mean = round(mean)\n", "print(mean)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9LL1usX3QfWL", "outputId": "bb5afd03-3ca1-4153-cbd8-c5cbd21834f3" }, "execution_count": 18, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "3\n" ] } ] }, { "cell_type": "code", "source": [ "size.fillna(mean, inplace=True)\n", "size.head(20)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 711 }, "id": "ghShyvScQigg", "outputId": "35e4f709-e614-40b9-ba4a-e092d874cdce" }, "execution_count": 19, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 3.0\n", "1 3.0\n", "2 3.0\n", "3 3.0\n", "4 3.0\n", "5 3.0\n", "6 3.0\n", "7 3.0\n", "8 3.0\n", "9 3.0\n", "10 3.0\n", "11 3.0\n", "12 3.0\n", "13 3.0\n", "14 3.0\n", "15 3.0\n", "16 3.0\n", "17 3.0\n", "18 3.0\n", "19 3.0\n", "Name: size, dtype: float64" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
size
03.0
13.0
23.0
33.0
43.0
53.0
63.0
73.0
83.0
93.0
103.0
113.0
123.0
133.0
143.0
153.0
163.0
173.0
183.0
193.0
\n", "

" ] }, "metadata": {}, "execution_count": 19 } ] }, { "cell_type": "code", "source": [ "size.fillna(mean, inplace=True)\n", "size.head(20)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 711 }, "id": "_mQUexwyQm1L", "outputId": "cf4b0447-603a-4236-80e6-0579ed7af235" }, "execution_count": 20, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 3.0\n", "1 3.0\n", "2 3.0\n", "3 3.0\n", "4 3.0\n", "5 3.0\n", "6 3.0\n", "7 3.0\n", "8 3.0\n", "9 3.0\n", "10 3.0\n", "11 3.0\n", "12 3.0\n", "13 3.0\n", "14 3.0\n", "15 3.0\n", "16 3.0\n", "17 3.0\n", "18 3.0\n", "19 3.0\n", "Name: size, dtype: float64" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
size
03.0
13.0
23.0
33.0
43.0
53.0
63.0
73.0
83.0
93.0
103.0
113.0
123.0
133.0
143.0
153.0
163.0
173.0
183.0
193.0
\n", "

" ] }, "metadata": {}, "execution_count": 20 } ] }, { "cell_type": "code", "source": [ "plt.hist(size)\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 430 }, "id": "5-eF8tU2QqML", "outputId": "90cc9e79-d455-4bd3-81dc-df23ae164e78" }, "execution_count": 21, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAH+9JREFUeJzt3XtwVPX9//HXhiWXYnZj0iabLQmkLTXcjURjhFqUjBiYFEaq4kSaIgO9JCikVUhHUFokwHhBaEyEsaAzUKqdgopjLA1K6hhCCKUjSBEqQipu0g5ml8QhRnJ+f7Tu77uSqqGbnM+uz8fMmXHPOXvy3jM6eXr2bNZhWZYlAAAAg8TYPQAAAMCnESgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjOO0e4BL0dPTozNnzigxMVEOh8PucQAAwBdgWZbOnTsnr9ermJjPvkYSkYFy5swZZWRk2D0GAAC4BC0tLRo6dOhn7hORgZKYmCjp3y/Q5XLZPA0AAPgiAoGAMjIygr/HP0tEBsonb+u4XC4CBQCACPNFbs/gJlkAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABjHafcA+PIavvQlu0fos3dXT7d7BAD4UuAKCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDh9DpT6+noVFRXJ6/XK4XBo586d/3XfH//4x3I4HFq3bl3I+rNnz6q4uFgul0tJSUmaN2+eOjo6+joKAACIUn0OlM7OTo0fP15VVVWfud+OHTu0b98+eb3ei7YVFxfryJEj2r17t3bt2qX6+notWLCgr6MAAIAo5ezrEwoLC1VYWPiZ+7z33ntauHChXnnlFU2fHvr19EePHlVtba2ampqUm5srSdqwYYOmTZumhx9+uNegAQAAXy5hvwelp6dHc+bM0b333qvRo0dftL2hoUFJSUnBOJGkgoICxcTEqLGxsddjdnV1KRAIhCwAACB6hT1Q1qxZI6fTqbvvvrvX7T6fT6mpqSHrnE6nkpOT5fP5en1OZWWl3G53cMnIyAj32AAAwCBhDZTm5mY9/vjj2rJlixwOR9iOW1FRIb/fH1xaWlrCdmwAAGCesAbKn//8Z7W1tSkzM1NOp1NOp1OnTp3Sz372Mw0fPlyS5PF41NbWFvK8jz/+WGfPnpXH4+n1uHFxcXK5XCELAACIXn2+SfazzJkzRwUFBSHrpk6dqjlz5mju3LmSpPz8fLW3t6u5uVkTJkyQJO3Zs0c9PT3Ky8sL5zgAACBC9TlQOjo6dOLEieDjkydP6tChQ0pOTlZmZqZSUlJC9h88eLA8Ho+uuOIKSdLIkSN18803a/78+aqpqVF3d7fKyso0e/ZsPsEDAAAkXcJbPAcOHFBOTo5ycnIkSeXl5crJydHy5cu/8DG2bt2q7OxsTZkyRdOmTdOkSZO0cePGvo4CAACiVJ+voEyePFmWZX3h/d99992L1iUnJ2vbtm19/dEAAOBLgu/iAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxulzoNTX16uoqEher1cOh0M7d+4Mbuvu7taSJUs0duxYDRkyRF6vVz/4wQ905syZkGOcPXtWxcXFcrlcSkpK0rx589TR0fE/vxgAABAd+hwonZ2dGj9+vKqqqi7a9uGHH+rgwYNatmyZDh48qD/84Q86duyYvve974XsV1xcrCNHjmj37t3atWuX6uvrtWDBgkt/FQAAIKo4LMuyLvnJDod27NihmTNn/td9mpqadM011+jUqVPKzMzU0aNHNWrUKDU1NSk3N1eSVFtbq2nTpukf//iHvF7v5/7cQCAgt9stv98vl8t1qePDZsOXvmT3CH327urpdo8AABGrL7+/+/0eFL/fL4fDoaSkJElSQ0ODkpKSgnEiSQUFBYqJiVFjY2Ovx+jq6lIgEAhZAABA9OrXQDl//ryWLFmiO+64I1hKPp9PqampIfs5nU4lJyfL5/P1epzKykq53e7gkpGR0Z9jAwAAm/VboHR3d+u2226TZVmqrq7+n45VUVEhv98fXFpaWsI0JQAAMJGzPw76SZycOnVKe/bsCXmfyePxqK2tLWT/jz/+WGfPnpXH4+n1eHFxcYqLi+uPUQEAgIHCfgXlkzg5fvy4/vSnPyklJSVke35+vtrb29Xc3Bxct2fPHvX09CgvLy/c4wAAgAjU5ysoHR0dOnHiRPDxyZMndejQISUnJys9PV3f//73dfDgQe3atUsXLlwI3leSnJys2NhYjRw5UjfffLPmz5+vmpoadXd3q6ysTLNnz/5Cn+ABAADRr8+BcuDAAd1www3Bx+Xl5ZKkkpISPfjgg3rhhRckSVdeeWXI81599VVNnjxZkrR161aVlZVpypQpiomJ0axZs7R+/fpLfAkAACDa9DlQJk+erM/60ylf5M+qJCcna9u2bX390QAA4EuC7+IBAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADG6XOg1NfXq6ioSF6vVw6HQzt37gzZblmWli9frvT0dCUkJKigoEDHjx8P2efs2bMqLi6Wy+VSUlKS5s2bp46Ojv/phQAAgOjR50Dp7OzU+PHjVVVV1ev2tWvXav369aqpqVFjY6OGDBmiqVOn6vz588F9iouLdeTIEe3evVu7du1SfX29FixYcOmvAgAARBVnX59QWFiowsLCXrdZlqV169bp/vvv14wZMyRJzzzzjNLS0rRz507Nnj1bR48eVW1trZqampSbmytJ2rBhg6ZNm6aHH35YXq/3f3g5AAAgGoT1HpSTJ0/K5/OpoKAguM7tdisvL08NDQ2SpIaGBiUlJQXjRJIKCgoUExOjxsbGXo/b1dWlQCAQsgAAgOgV1kDx+XySpLS0tJD1aWlpwW0+n0+pqakh251Op5KTk4P7fFplZaXcbndwycjICOfYAADAMBHxKZ6Kigr5/f7g0tLSYvdIAACgH4U1UDwejySptbU1ZH1ra2twm8fjUVtbW8j2jz/+WGfPng3u82lxcXFyuVwhCwAAiF5hDZSsrCx5PB7V1dUF1wUCATU2Nio/P1+SlJ+fr/b2djU3Nwf32bNnj3p6epSXlxfOcQAAQITq86d4Ojo6dOLEieDjkydP6tChQ0pOTlZmZqYWLVqklStXasSIEcrKytKyZcvk9Xo1c+ZMSdLIkSN18803a/78+aqpqVF3d7fKyso0e/ZsPsEDAAAkXUKgHDhwQDfccEPwcXl5uSSppKREW7Zs0X333afOzk4tWLBA7e3tmjRpkmpraxUfHx98ztatW1VWVqYpU6YoJiZGs2bN0vr168PwcgAAQDRwWJZl2T1EXwUCAbndbvn9fu5HiWDDl75k9wh99u7q6XaPAAARqy+/vyPiUzwAAODLhUABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYJ+yBcuHCBS1btkxZWVlKSEjQN7/5Tf3qV7+SZVnBfSzL0vLly5Wenq6EhAQVFBTo+PHj4R4FAABEqLAHypo1a1RdXa1f//rXOnr0qNasWaO1a9dqw4YNwX3Wrl2r9evXq6amRo2NjRoyZIimTp2q8+fPh3scAAAQgZzhPuAbb7yhGTNmaPr06ZKk4cOH67e//a32798v6d9XT9atW6f7779fM2bMkCQ988wzSktL086dOzV79uxwjwQAACJM2K+gXHfddaqrq9Pbb78tSfrrX/+q119/XYWFhZKkkydPyufzqaCgIPgct9utvLw8NTQ09HrMrq4uBQKBkAUAAESvsF9BWbp0qQKBgLKzszVo0CBduHBBDz30kIqLiyVJPp9PkpSWlhbyvLS0tOC2T6usrNSKFSvCPSoAADBU2K+gPPvss9q6dau2bdumgwcP6umnn9bDDz+sp59++pKPWVFRIb/fH1xaWlrCODEAADBN2K+g3HvvvVq6dGnwXpKxY8fq1KlTqqysVElJiTwejySptbVV6enpwee1trbqyiuv7PWYcXFxiouLC/eoAADAUGG/gvLhhx8qJib0sIMGDVJPT48kKSsrSx6PR3V1dcHtgUBAjY2Nys/PD/c4AAAgAoX9CkpRUZEeeughZWZmavTo0frLX/6iRx99VHfddZckyeFwaNGiRVq5cqVGjBihrKwsLVu2TF6vVzNnzgz3OAAAIAKFPVA2bNigZcuW6ac//ana2trk9Xr1ox/9SMuXLw/uc99996mzs1MLFixQe3u7Jk2apNraWsXHx4d7HAAAEIEc1v/9E68RIhAIyO12y+/3y+Vy2T0OLtHwpS/ZPUKfvbt6ut0jAEDE6svvb76LBwAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYJx+CZT33ntPd955p1JSUpSQkKCxY8fqwIEDwe2WZWn58uVKT09XQkKCCgoKdPz48f4YBQAARKCwB8oHH3ygiRMnavDgwXr55Zf11ltv6ZFHHtHll18e3Gft2rVav369ampq1NjYqCFDhmjq1Kk6f/58uMcBAAARyBnuA65Zs0YZGRnavHlzcF1WVlbwny3L0rp163T//fdrxowZkqRnnnlGaWlp2rlzp2bPnh3ukQAAQIQJ+xWUF154Qbm5ubr11luVmpqqnJwcbdq0Kbj95MmT8vl8KigoCK5zu93Ky8tTQ0NDuMcBAAARKOyB8s4776i6ulojRozQK6+8op/85Ce6++679fTTT0uSfD6fJCktLS3keWlpacFtn9bV1aVAIBCyAACA6BX2t3h6enqUm5urVatWSZJycnJ0+PBh1dTUqKSk5JKOWVlZqRUrVoRzTAAAYLCwX0FJT0/XqFGjQtaNHDlSp0+fliR5PB5JUmtra8g+ra2twW2fVlFRIb/fH1xaWlrCPTYAADBI2ANl4sSJOnbsWMi6t99+W8OGDZP07xtmPR6P6urqgtsDgYAaGxuVn5/f6zHj4uLkcrlCFgAAEL3C/hbP4sWLdd1112nVqlW67bbbtH//fm3cuFEbN26UJDkcDi1atEgrV67UiBEjlJWVpWXLlsnr9WrmzJnhHgcAAESgsAfK1VdfrR07dqiiokK//OUvlZWVpXXr1qm4uDi4z3333afOzk4tWLBA7e3tmjRpkmpraxUfHx/ucQAAQARyWJZl2T1EXwUCAbndbvn9ft7uiWDDl75k9wh99u7q6XaPAAARqy+/v/kuHgAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYByn3QMA6F98azSASMQVFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMbp90BZvXq1HA6HFi1aFFx3/vx5lZaWKiUlRZdddplmzZql1tbW/h4FAABEiH4NlKamJj355JMaN25cyPrFixfrxRdf1HPPPae9e/fqzJkzuuWWW/pzFAAAEEH6LVA6OjpUXFysTZs26fLLLw+u9/v9euqpp/Too4/qxhtv1IQJE7R582a98cYb2rdvX3+NAwAAIki/BUppaammT5+ugoKCkPXNzc3q7u4OWZ+dna3MzEw1NDT0eqyuri4FAoGQBQAARC9nfxx0+/btOnjwoJqami7a5vP5FBsbq6SkpJD1aWlp8vl8vR6vsrJSK1as6I9RAQCAgcJ+BaWlpUX33HOPtm7dqvj4+LAcs6KiQn6/P7i0tLSE5bgAAMBMYQ+U5uZmtbW16aqrrpLT6ZTT6dTevXu1fv16OZ1OpaWl6aOPPlJ7e3vI81pbW+XxeHo9ZlxcnFwuV8gCAACiV9jf4pkyZYrefPPNkHVz585Vdna2lixZooyMDA0ePFh1dXWaNWuWJOnYsWM6ffq08vPzwz0OAACIQGEPlMTERI0ZMyZk3ZAhQ5SSkhJcP2/ePJWXlys5OVkul0sLFy5Ufn6+rr322nCPAwAAIlC/3CT7eR577DHFxMRo1qxZ6urq0tSpU/XEE0/YMQoAADDQgATKa6+9FvI4Pj5eVVVVqqqqGogfDwAAIgzfxQMAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwzIN9mDADRbvjSl+weoc/eXT3d7hGA/4orKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwT9kCprKzU1VdfrcTERKWmpmrmzJk6duxYyD7nz59XaWmpUlJSdNlll2nWrFlqbW0N9ygAACBChT1Q9u7dq9LSUu3bt0+7d+9Wd3e3brrpJnV2dgb3Wbx4sV588UU999xz2rt3r86cOaNbbrkl3KMAAIAI5Qz3AWtra0Meb9myRampqWpubtb1118vv9+vp556Stu2bdONN94oSdq8ebNGjhypffv26dprrw33SAAAIML0+z0ofr9fkpScnCxJam5uVnd3twoKCoL7ZGdnKzMzUw0NDb0eo6urS4FAIGQBAADRq18DpaenR4sWLdLEiRM1ZswYSZLP51NsbKySkpJC9k1LS5PP5+v1OJWVlXK73cElIyOjP8cGAAA269dAKS0t1eHDh7V9+/b/6TgVFRXy+/3BpaWlJUwTAgAAE4X9HpRPlJWVadeuXaqvr9fQoUOD6z0ejz766CO1t7eHXEVpbW2Vx+Pp9VhxcXGKi4vrr1EBAIBhwn4FxbIslZWVaceOHdqzZ4+ysrJCtk+YMEGDBw9WXV1dcN2xY8d0+vRp5efnh3scAAAQgcJ+BaW0tFTbtm3T888/r8TExOB9JW63WwkJCXK73Zo3b57Ky8uVnJwsl8ulhQsXKj8/n0/wAAAASf0QKNXV1ZKkyZMnh6zfvHmzfvjDH0qSHnvsMcXExGjWrFnq6urS1KlT9cQTT4R7FAAAEKHCHiiWZX3uPvHx8aqqqlJVVVW4fzwAAIgCfBcPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADCO0+4BAAD4ooYvfcnuEfrs3dXT7R4hInEFBQAAGIdAAQAAxuEtnl5wCREAEC6R+DtFsv/3CldQAACAcQgUAABgHAIFAAAYh0ABAADGsTVQqqqqNHz4cMXHxysvL0/79++3cxwAAGAI2wLld7/7ncrLy/XAAw/o4MGDGj9+vKZOnaq2tja7RgIAAIawLVAeffRRzZ8/X3PnztWoUaNUU1Ojr3zlK/rNb35j10gAAMAQtvwdlI8++kjNzc2qqKgIrouJiVFBQYEaGhou2r+rq0tdXV3Bx36/X5IUCAT6Zb6erg/75bj9qb/ORX/iPA8MzvPA4DwPjEg8z5GqP/79+OSYlmV97r62BMq//vUvXbhwQWlpaSHr09LS9Le//e2i/SsrK7VixYqL1mdkZPTbjJHGvc7uCb4cOM8Dg/M8MDjP+Cz9+e/HuXPn5Ha7P3OfiPhLshUVFSovLw8+7unp0dmzZ5WSkiKHwxHWnxUIBJSRkaGWlha5XK6wHhv/H+d5YHCeBwbneWBwngdOf51ry7J07tw5eb3ez93XlkD56le/qkGDBqm1tTVkfWtrqzwez0X7x8XFKS4uLmRdUlJSf44ol8vFfwADgPM8MDjPA4PzPDA4zwOnP8715105+YQtN8nGxsZqwoQJqqurC67r6elRXV2d8vPz7RgJAAAYxLa3eMrLy1VSUqLc3Fxdc801WrdunTo7OzV37ly7RgIAAIawLVBuv/12/fOf/9Ty5cvl8/l05ZVXqra29qIbZwdaXFycHnjggYveUkJ4cZ4HBud5YHCeBwbneeCYcK4d1hf5rA8AAMAA4rt4AACAcQgUAABgHAIFAAAYh0ABAADGIVD+o76+XkVFRfJ6vXI4HNq5c6fdI0WdyspKXX311UpMTFRqaqpmzpypY8eO2T1WVKqurta4ceOCf2QpPz9fL7/8st1jRb3Vq1fL4XBo0aJFdo8SVR588EE5HI6QJTs72+6xotJ7772nO++8UykpKUpISNDYsWN14MABW2YhUP6js7NT48ePV1VVld2jRK29e/eqtLRU+/bt0+7du9Xd3a2bbrpJnZ2ddo8WdYYOHarVq1erublZBw4c0I033qgZM2boyJEjdo8WtZqamvTkk09q3Lhxdo8SlUaPHq33338/uLz++ut2jxR1PvjgA02cOFGDBw/Wyy+/rLfeekuPPPKILr/8clvmiYjv4hkIhYWFKiwstHuMqFZbWxvyeMuWLUpNTVVzc7Ouv/56m6aKTkVFRSGPH3roIVVXV2vfvn0aPXq0TVNFr46ODhUXF2vTpk1auXKl3eNEJafT2etXoSB81qxZo4yMDG3evDm4Lisry7Z5uIIC2/j9fklScnKyzZNEtwsXLmj79u3q7OzkqyT6SWlpqaZPn66CggK7R4lax48fl9fr1Te+8Q0VFxfr9OnTdo8UdV544QXl5ubq1ltvVWpqqnJycrRp0ybb5uEKCmzR09OjRYsWaeLEiRozZozd40SlN998U/n5+Tp//rwuu+wy7dixQ6NGjbJ7rKizfft2HTx4UE1NTXaPErXy8vK0ZcsWXXHFFXr//fe1YsUKfec739Hhw4eVmJho93hR45133lF1dbXKy8v1i1/8Qk1NTbr77rsVGxurkpKSAZ+HQIEtSktLdfjwYd5H7kdXXHGFDh06JL/fr9///vcqKSnR3r17iZQwamlp0T333KPdu3crPj7e7nGi1v99+33cuHHKy8vTsGHD9Oyzz2revHk2ThZdenp6lJubq1WrVkmScnJydPjwYdXU1NgSKLzFgwFXVlamXbt26dVXX9XQoUPtHidqxcbG6lvf+pYmTJigyspKjR8/Xo8//rjdY0WV5uZmtbW16aqrrpLT6ZTT6dTevXu1fv16OZ1OXbhwwe4Ro1JSUpK+/e1v68SJE3aPElXS09Mv+h+YkSNH2vZ2GldQMGAsy9LChQu1Y8cOvfbaa7befPVl1NPTo66uLrvHiCpTpkzRm2++GbJu7ty5ys7O1pIlSzRo0CCbJotuHR0d+vvf/645c+bYPUpUmThx4kV/+uHtt9/WsGHDbJmHQPmPjo6OkBo/efKkDh06pOTkZGVmZto4WfQoLS3Vtm3b9PzzzysxMVE+n0+S5Ha7lZCQYPN00aWiokKFhYXKzMzUuXPntG3bNr322mt65ZVX7B4tqiQmJl50D9WQIUOUkpLCvVVh9POf/1xFRUUaNmyYzpw5owceeECDBg3SHXfcYfdoUWXx4sW67rrrtGrVKt12223av3+/Nm7cqI0bN9ozkAXLsizr1VdftSRdtJSUlNg9WtTo7fxKsjZv3mz3aFHnrrvusoYNG2bFxsZaX/va16wpU6ZYf/zjH+0e60vhu9/9rnXPPffYPUZUuf3226309HQrNjbW+vrXv27dfvvt1okTJ+weKyq9+OKL1pgxY6y4uDgrOzvb2rhxo22zOCzLsuxJIwAAgN5xkywAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4/w8vLGekpacueAAAAABJRU5ErkJggg==\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "min_val = size.mean() - (3 * size.std())\n", "print(min_val)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_U-ot3whQzSA", "outputId": "e43779ed-5fca-4010-d9a0-31a89568add0" }, "execution_count": 22, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "-0.1974349065787404\n" ] } ] }, { "cell_type": "code", "source": [ "max_val = size.mean() + (3 * size.std())\n", "print(max_val)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jT4T3Z-eQ28V", "outputId": "4260d170-e038-4610-eb87-0df7e8f21226" }, "execution_count": 23, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "5.369566054119724\n" ] } ] }, { "cell_type": "code", "source": [ "outliers = size[size > max_val]\n", "outliers.count()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DE1r88vFQ6ZR", "outputId": "0797c360-89e3-45d9-a951-b64e6f1a1597" }, "execution_count": 24, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "np.int64(4)" ] }, "metadata": {}, "execution_count": 24 } ] }, { "cell_type": "code", "source": [ "print(outliers)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PTWFJ0CaQ93p", "outputId": "15664ebb-ac46-4edb-b311-903ad25739b6" }, "execution_count": 25, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "125 6.0\n", "141 6.0\n", "143 6.0\n", "156 6.0\n", "Name: size, dtype: float64\n" ] } ] }, { "cell_type": "code", "source": [ "age = size[size <= max_val]\n", "age.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vDZ1IA-3RCHP", "outputId": "9f770a1a-4439-4abf-a5a0-387566ae8d93" }, "execution_count": 26, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(240,)" ] }, "metadata": {}, "execution_count": 26 } ] } ] }