{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyM6lcWDIRzwQ5fcw7a7TiiZ", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "cMIjQEwLJPKQ", "outputId": "c3c64dff-48e9-4dab-e007-fc9cf25753e9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'The-Machine-Learning-Workshop'...\n", "remote: Enumerating objects: 805, done.\u001b[K\n", "remote: Counting objects: 100% (23/23), done.\u001b[K\n", "remote: Compressing objects: 100% (15/15), done.\u001b[K\n", "remote: Total 805 (delta 15), reused 8 (delta 8), pack-reused 782 (from 1)\u001b[K\n", "Receiving objects: 100% (805/805), 10.36 MiB | 9.64 MiB/s, done.\n", "Resolving deltas: 100% (293/293), done.\n" ] } ], "source": [ "!git clone https://github.com/MsSaidat25/The-Machine-Learning-Workshop.git" ] }, { "cell_type": "code", "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop')\n", "!ls # see all folders/files" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fkumve32Jj-w", "outputId": "f5f0c473-b4c3-4e25-b4e9-23db465582c1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Chapter01 Chapter03 Chapter05 Graphics README.md\n", "Chapter02 Chapter04 Chapter06 LICENSE requirements.txt\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8a5d3702", "outputId": "312c313b-b695-48a4-e9f6-cad0ad63f4f9" }, "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01')\n", "!ls" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Activity1.01 Exercise1.01 Exercise1.03\n", "Activity1.02 Exercise1.02 Exercise1.04\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "99014702", "outputId": "a7833725-7b7b-4655-8a28-5f0898678ede" }, "source": [ "import json\n", "\n", "notebook_path = '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'\n", "\n", "with open(notebook_path, 'r') as f:\n", " notebook_content = json.load(f)\n", "\n", "cells_to_generate = []\n", "for cell in notebook_content['cells']:\n", " if cell['cell_type'] == 'code':\n", " cells_to_generate.append({'cell_type': 'python', 'code': ''.join(cell['source'])})\n", " elif cell['cell_type'] == 'markdown':\n", " cells_to_generate.append({'cell_type': 'markdown', 'code': ''.join(cell['source'])})\n", "\n", "# This list will be used by the next command to generate the actual cells.\n", "# For now, I will just print the first few cells to confirm the parsing.\n", "print(f\"Found {len(cells_to_generate)} cells in the notebook. Preview of the first cell:\\n{cells_to_generate[0]['code'] if cells_to_generate else 'No cells found.'}\")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Found 5 cells in the notebook. Preview of the first cell:\n", "import seaborn as sns\n", "titanic = sns.load_dataset('titanic')\n", "titanic.head(10)\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "12666191", "outputId": "cb746e2f-c18d-4518-88c9-d6e3a401b318" }, "source": [ "import os\n", "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01')\n", "!ls" ], "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Activity1_01.ipynb titanic.csv unit_test_activity1_01.ipynb\n" ] } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 383 }, "id": "1b2938e2", "outputId": "50844bb9-e0d9-48bf-a6ae-79e9aa0a0c8c" }, "source": [ "import seaborn as sns\n", "titanic = sns.load_dataset('titanic')\n", "titanic.head(10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " survived pclass sex age sibsp parch fare embarked class \\\n", "0 0 3 male 22.0 1 0 7.2500 S Third \n", "1 1 1 female 38.0 1 0 71.2833 C First \n", "2 1 3 female 26.0 0 0 7.9250 S Third \n", "3 1 1 female 35.0 1 0 53.1000 S First \n", "4 0 3 male 35.0 0 0 8.0500 S Third \n", "5 0 3 male NaN 0 0 8.4583 Q Third \n", "6 0 1 male 54.0 0 0 51.8625 S First \n", "7 0 3 male 2.0 3 1 21.0750 S Third \n", "8 1 3 female 27.0 0 2 11.1333 S Third \n", "9 1 2 female 14.0 1 0 30.0708 C Second \n", "\n", " who adult_male deck embark_town alive alone \n", "0 man True NaN Southampton no False \n", "1 woman False C Cherbourg yes False \n", "2 woman False NaN Southampton yes True \n", "3 woman False C Southampton yes False \n", "4 man True NaN Southampton no True \n", "5 man True NaN Queenstown no True \n", "6 man True E Southampton no True \n", "7 child False NaN Southampton no False \n", "8 woman False NaN Southampton yes False \n", "9 child False NaN Cherbourg yes False " ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
503maleNaN008.4583QThirdmanTrueNaNQueenstownnoTrue
601male54.00051.8625SFirstmanTrueESouthamptonnoTrue
703male2.03121.0750SThirdchildFalseNaNSouthamptonnoFalse
813female27.00211.1333SThirdwomanFalseNaNSouthamptonyesFalse
912female14.01030.0708CSecondchildFalseNaNCherbourgyesFalse
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "titanic", "summary": "{\n \"name\": \"titanic\",\n \"rows\": 891,\n \"fields\": [\n {\n \"column\": \"survived\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pclass\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 3,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.526497332334044,\n \"min\": 0.42,\n \"max\": 80.0,\n \"num_unique_values\": 88,\n \"samples\": [\n 0.75,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sibsp\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 8,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"parch\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 49.693428597180905,\n \"min\": 0.0,\n \"max\": 512.3292,\n \"num_unique_values\": 248,\n \"samples\": [\n 11.2417,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embarked\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"S\",\n \"C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Third\",\n \"First\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"who\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"man\",\n \"woman\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"adult_male\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n false,\n true\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"deck\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"C\",\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Southampton\",\n \"Cherbourg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alive\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"yes\",\n \"no\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 9 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "bd9b06d2", "outputId": "b51231af-c764-4e73-bcc1-56a31c16da8b" }, "source": [ "X = titanic[['sex', 'age', 'fare', 'class', 'embark_town', 'alone']]\n", "display(X.head())" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ " sex age fare class embark_town alone\n", "0 male 22.0 7.2500 Third Southampton False\n", "1 female 38.0 71.2833 First Cherbourg False\n", "2 female 26.0 7.9250 Third Southampton True\n", "3 female 35.0 53.1000 First Southampton False\n", "4 male 35.0 8.0500 Third Southampton True" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagefareclassembark_townalone
0male22.07.2500ThirdSouthamptonFalse
1female38.071.2833FirstCherbourgFalse
2female26.07.9250ThirdSouthamptonTrue
3female35.053.1000FirstSouthamptonFalse
4male35.08.0500ThirdSouthamptonTrue
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "summary": "{\n \"name\": \"display(X\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"female\",\n \"male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.833739825307955,\n \"min\": 22.0,\n \"max\": 38.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 38.0,\n 35.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.5100288352535,\n \"min\": 7.25,\n \"max\": 71.2833,\n \"num_unique_values\": 5,\n \"samples\": [\n 71.2833,\n 8.05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"First\",\n \"Third\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Cherbourg\",\n \"Southampton\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"boolean\",\n \"num_unique_values\": 2,\n \"samples\": [\n true,\n false\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {} } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "15cb730c", "outputId": "8f852295-a151-4942-b1d0-c163351a517f" }, "source": [ "X.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(891, 6)" ] }, "metadata": {}, "execution_count": 34 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "42d82e7e", "outputId": "81ccfc58-d19f-4c0d-b329-de45f0706430" }, "source": [ "Y.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(891,)" ] }, "metadata": {}, "execution_count": 35 } ] }, { "cell_type": "markdown", "source": [ "Dealing with messy data" ], "metadata": { "id": "BN_Y-5xcReHs" } }, { "cell_type": "code", "source": [ "import seaborn as sns\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "tips = sns.load_dataset('titanic')" ], "metadata": { "id": "6V7NzckGO9cR" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "c79dedfd" }, "source": [ "# Task\n", "Check for and handle missing values and outliers in the features matrix `X`. Then, summarize the findings and the methods used to address them." ] }, { "cell_type": "markdown", "metadata": { "id": "a33d522d" }, "source": [ "## Check Missing Values\n", "\n", "### Subtask:\n", "Identify and count the number of missing values in each column of the features matrix `X`. This will help us understand the extent of missing data.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "ea922db0" }, "source": [ "**Reasoning**:\n", "To identify and count missing values in each column of the features matrix `X`, I will use the `.isnull()` method followed by `.sum()` to get the total count for each column. This directly addresses the subtask's instructions.\n", "\n" ] }, { "cell_type": "code", "metadata": { "id": "d43f1b5d", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b6879fd0-16bf-4fc1-c951-b2cedc66202b" }, "source": [ "missing_values = X.isnull().sum()\n", "print(\"Missing values per column in X:\")\n", "print(missing_values)" ], "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Missing values per column in X:\n", "sex 0\n", "age 177\n", "fare 0\n", "class 0\n", "embark_town 2\n", "alone 0\n", "dtype: int64\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "9dfd8c70" }, "source": [ "# Task\n", "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column." ] }, { "cell_type": "markdown", "metadata": { "id": "26083663" }, "source": [ "## Handle Missing Values in 'age'\n", "\n", "### Subtask:\n", "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "6d688010" }, "source": [ "**Reasoning**:\n", "To impute the missing values in the 'age' column, I will first calculate its median as specified in the instructions.\n", "\n" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "c8c06424", "outputId": "0b48740b-a9d0-4593-a370-c99d172892fc" }, "source": [ "median_age = X['age'].median()\n", "print(f\"Median age: {median_age}\")" ], "execution_count": 40, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Median age: 28.0\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "40c208e7" }, "source": [ "**Reasoning**:\n", "Now that the median age has been calculated, I will use it to fill the missing values in the 'age' column of DataFrame `X`, and then verify the imputation by checking for remaining missing values.\n", "\n" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "77b9c2d7", "outputId": "c63b0165-7ffb-43ca-879f-979f7724b4cc" }, "source": [ "features = [\"age\", \"fare\"]\n", "for feature in features:\n", " min_ = X[feature].mean() - (3 * X[feature].std())\n", " max_ = X[feature].mean() + (3 * X[feature].std())\n", " X = X[X[feature] <= max_]\n", " X = X[X[feature] >= min_]\n", " print(feature, \":\", X.shape)" ], "execution_count": 46, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "age : (884, 6)\n", "fare : (864, 6)\n" ] } ] }, { "cell_type": "code", "source": [ "features = [\"sex\", \"class\", \"embark_town\", \"alone\"]\n", "for feature in features:\n", " count_ = X[feature].value_counts()\n", " print(feature)\n", " print(count_, \"\\n\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_PFKkCUKW1JE", "outputId": "4a4c1e72-57a0-4a02-a591-d5a9d8781c33" }, "execution_count": 47, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "sex\n", "sex\n", "male 562\n", "female 302\n", "Name: count, dtype: int64 \n", "\n", "class\n", "class\n", "Third 489\n", "First 192\n", "Second 183\n", "Name: count, dtype: int64 \n", "\n", "embark_town\n", "embark_town\n", "Southampton 632\n", "Cherbourg 154\n", "Queenstown 76\n", "Name: count, dtype: int64 \n", "\n", "alone\n", "alone\n", "True 524\n", "False 340\n", "Name: count, dtype: int64 \n", "\n" ] } ] }, { "cell_type": "code", "source": [ "enc = LabelEncoder()\n", "X[\"sex\"] = enc.fit_transform(X['sex'].astype('str'))\n", "X[\"class\"] = enc.fit_transform(X['class'].astype('str'))\n", "X[\"embark_town\"] = enc.fit_transform(X['embark_town'].\\\n", " astype('str'))\n", "X[\"alone\"] = enc.fit_transform(X['alone'].astype('str'))\n", "X.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "id": "AwsqNomXW45N", "outputId": "990b54f7-d636-423f-8522-73af7a5b9cca" }, "execution_count": 49, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " sex age fare class embark_town alone\n", "0 1 22.0 7.2500 2 2 0\n", "1 0 38.0 71.2833 0 0 0\n", "2 0 26.0 7.9250 2 2 1\n", "3 0 35.0 53.1000 0 2 0\n", "4 1 35.0 8.0500 2 2 1" ], "text/html": [ "\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sexagefareclassembark_townalone
0122.07.2500220
1038.071.2833000
2026.07.9250221
3035.053.1000020
4135.08.0500221
\n", "
\n", "
\n", "\n", "
\n", " \n", "\n", " \n", "\n", " \n", "
\n", "\n", "\n", "
\n", "
\n" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "dataframe", "variable_name": "X", "summary": "{\n \"name\": \"X\",\n \"rows\": 864,\n \"fields\": [\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"age\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 12.498758947613258,\n \"min\": 0.42,\n \"max\": 66.0,\n \"num_unique_values\": 83,\n \"samples\": [\n 5.0,\n 22.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 29.400192357023762,\n \"min\": 0.0,\n \"max\": 164.8667,\n \"num_unique_values\": 239,\n \"samples\": [\n 7.8958,\n 51.8625\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"class\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"embark_town\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"alone\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" } }, "metadata": {}, "execution_count": 49 } ] } ] }