{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyM6lcWDIRzwQ5fcw7a7TiiZ",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/MsSaidat25/AI-Engineer-Projects/blob/main/Untitled1.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "cMIjQEwLJPKQ",
        "outputId": "c3c64dff-48e9-4dab-e007-fc9cf25753e9"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'The-Machine-Learning-Workshop'...\n",
            "remote: Enumerating objects: 805, done.\u001b[K\n",
            "remote: Counting objects: 100% (23/23), done.\u001b[K\n",
            "remote: Compressing objects: 100% (15/15), done.\u001b[K\n",
            "remote: Total 805 (delta 15), reused 8 (delta 8), pack-reused 782 (from 1)\u001b[K\n",
            "Receiving objects: 100% (805/805), 10.36 MiB | 9.64 MiB/s, done.\n",
            "Resolving deltas: 100% (293/293), done.\n"
          ]
        }
      ],
      "source": [
        "!git clone https://github.com/MsSaidat25/The-Machine-Learning-Workshop.git"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "os.chdir('/content/The-Machine-Learning-Workshop')\n",
        "!ls  # see all folders/files"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fkumve32Jj-w",
        "outputId": "f5f0c473-b4c3-4e25-b4e9-23db465582c1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Chapter01  Chapter03  Chapter05  Graphics  README.md\n",
            "Chapter02  Chapter04  Chapter06  LICENSE   requirements.txt\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8a5d3702",
        "outputId": "312c313b-b695-48a4-e9f6-cad0ad63f4f9"
      },
      "source": [
        "import os\n",
        "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01')\n",
        "!ls"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Activity1.01  Exercise1.01  Exercise1.03\n",
            "Activity1.02  Exercise1.02  Exercise1.04\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "99014702",
        "outputId": "a7833725-7b7b-4655-8a28-5f0898678ede"
      },
      "source": [
        "import json\n",
        "\n",
        "notebook_path = '/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01/Activity1_01.ipynb'\n",
        "\n",
        "with open(notebook_path, 'r') as f:\n",
        "    notebook_content = json.load(f)\n",
        "\n",
        "cells_to_generate = []\n",
        "for cell in notebook_content['cells']:\n",
        "    if cell['cell_type'] == 'code':\n",
        "        cells_to_generate.append({'cell_type': 'python', 'code': ''.join(cell['source'])})\n",
        "    elif cell['cell_type'] == 'markdown':\n",
        "        cells_to_generate.append({'cell_type': 'markdown', 'code': ''.join(cell['source'])})\n",
        "\n",
        "# This list will be used by the next command to generate the actual cells.\n",
        "# For now, I will just print the first few cells to confirm the parsing.\n",
        "print(f\"Found {len(cells_to_generate)} cells in the notebook. Preview of the first cell:\\n{cells_to_generate[0]['code'] if cells_to_generate else 'No cells found.'}\")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Found 5 cells in the notebook. Preview of the first cell:\n",
            "import seaborn as sns\n",
            "titanic = sns.load_dataset('titanic')\n",
            "titanic.head(10)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "12666191",
        "outputId": "cb746e2f-c18d-4518-88c9-d6e3a401b318"
      },
      "source": [
        "import os\n",
        "os.chdir('/content/The-Machine-Learning-Workshop/Chapter01/Activity1.01')\n",
        "!ls"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Activity1_01.ipynb  titanic.csv  unit_test_activity1_01.ipynb\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 383
        },
        "id": "1b2938e2",
        "outputId": "50844bb9-e0d9-48bf-a6ae-79e9aa0a0c8c"
      },
      "source": [
        "import seaborn as sns\n",
        "titanic = sns.load_dataset('titanic')\n",
        "titanic.head(10)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   survived  pclass     sex   age  sibsp  parch     fare embarked   class  \\\n",
              "0         0       3    male  22.0      1      0   7.2500        S   Third   \n",
              "1         1       1  female  38.0      1      0  71.2833        C   First   \n",
              "2         1       3  female  26.0      0      0   7.9250        S   Third   \n",
              "3         1       1  female  35.0      1      0  53.1000        S   First   \n",
              "4         0       3    male  35.0      0      0   8.0500        S   Third   \n",
              "5         0       3    male   NaN      0      0   8.4583        Q   Third   \n",
              "6         0       1    male  54.0      0      0  51.8625        S   First   \n",
              "7         0       3    male   2.0      3      1  21.0750        S   Third   \n",
              "8         1       3  female  27.0      0      2  11.1333        S   Third   \n",
              "9         1       2  female  14.0      1      0  30.0708        C  Second   \n",
              "\n",
              "     who  adult_male deck  embark_town alive  alone  \n",
              "0    man        True  NaN  Southampton    no  False  \n",
              "1  woman       False    C    Cherbourg   yes  False  \n",
              "2  woman       False  NaN  Southampton   yes   True  \n",
              "3  woman       False    C  Southampton   yes  False  \n",
              "4    man        True  NaN  Southampton    no   True  \n",
              "5    man        True  NaN   Queenstown    no   True  \n",
              "6    man        True    E  Southampton    no   True  \n",
              "7  child       False  NaN  Southampton    no  False  \n",
              "8  woman       False  NaN  Southampton   yes  False  \n",
              "9  child       False  NaN    Cherbourg   yes  False  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>survived</th>\n",
              "      <th>pclass</th>\n",
              "      <th>sex</th>\n",
              "      <th>age</th>\n",
              "      <th>sibsp</th>\n",
              "      <th>parch</th>\n",
              "      <th>fare</th>\n",
              "      <th>embarked</th>\n",
              "      <th>class</th>\n",
              "      <th>who</th>\n",
              "      <th>adult_male</th>\n",
              "      <th>deck</th>\n",
              "      <th>embark_town</th>\n",
              "      <th>alive</th>\n",
              "      <th>alone</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>female</td>\n",
              "      <td>38.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>C</td>\n",
              "      <td>First</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>C</td>\n",
              "      <td>Cherbourg</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>female</td>\n",
              "      <td>26.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>yes</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>female</td>\n",
              "      <td>35.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>53.1000</td>\n",
              "      <td>S</td>\n",
              "      <td>First</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>C</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>35.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>8.0500</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>8.4583</td>\n",
              "      <td>Q</td>\n",
              "      <td>Third</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Queenstown</td>\n",
              "      <td>no</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>male</td>\n",
              "      <td>54.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>51.8625</td>\n",
              "      <td>S</td>\n",
              "      <td>First</td>\n",
              "      <td>man</td>\n",
              "      <td>True</td>\n",
              "      <td>E</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>male</td>\n",
              "      <td>2.0</td>\n",
              "      <td>3</td>\n",
              "      <td>1</td>\n",
              "      <td>21.0750</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>child</td>\n",
              "      <td>False</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>no</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>female</td>\n",
              "      <td>27.0</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>11.1333</td>\n",
              "      <td>S</td>\n",
              "      <td>Third</td>\n",
              "      <td>woman</td>\n",
              "      <td>False</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>1</td>\n",
              "      <td>2</td>\n",
              "      <td>female</td>\n",
              "      <td>14.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>30.0708</td>\n",
              "      <td>C</td>\n",
              "      <td>Second</td>\n",
              "      <td>child</td>\n",
              "      <td>False</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Cherbourg</td>\n",
              "      <td>yes</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-624c1dc8-4758-4bc3-9fe9-1528c5e244ca');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "titanic",
              "summary": "{\n  \"name\": \"titanic\",\n  \"rows\": 891,\n  \"fields\": [\n    {\n      \"column\": \"survived\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"pclass\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 3,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          3,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sex\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"female\",\n          \"male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14.526497332334044,\n        \"min\": 0.42,\n        \"max\": 80.0,\n        \"num_unique_values\": 88,\n        \"samples\": [\n          0.75,\n          22.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"sibsp\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 0,\n        \"max\": 8,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"parch\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 6,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 49.693428597180905,\n        \"min\": 0.0,\n        \"max\": 512.3292,\n        \"num_unique_values\": 248,\n        \"samples\": [\n          11.2417,\n          51.8625\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embarked\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"S\",\n          \"C\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"class\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Third\",\n          \"First\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"who\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"man\",\n          \"woman\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"adult_male\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          false,\n          true\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"deck\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 7,\n        \"samples\": [\n          \"C\",\n          \"E\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embark_town\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Southampton\",\n          \"Cherbourg\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alive\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"yes\",\n          \"no\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alone\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          true,\n          false\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "bd9b06d2",
        "outputId": "b51231af-c764-4e73-bcc1-56a31c16da8b"
      },
      "source": [
        "X = titanic[['sex', 'age', 'fare', 'class', 'embark_town', 'alone']]\n",
        "display(X.head())"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "      sex   age     fare  class  embark_town  alone\n",
              "0    male  22.0   7.2500  Third  Southampton  False\n",
              "1  female  38.0  71.2833  First    Cherbourg  False\n",
              "2  female  26.0   7.9250  Third  Southampton   True\n",
              "3  female  35.0  53.1000  First  Southampton  False\n",
              "4    male  35.0   8.0500  Third  Southampton   True"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-1bc86b89-9055-4685-a83b-a89268104b28\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sex</th>\n",
              "      <th>age</th>\n",
              "      <th>fare</th>\n",
              "      <th>class</th>\n",
              "      <th>embark_town</th>\n",
              "      <th>alone</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>male</td>\n",
              "      <td>22.0</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>Third</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>female</td>\n",
              "      <td>38.0</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>First</td>\n",
              "      <td>Cherbourg</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>female</td>\n",
              "      <td>26.0</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>Third</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>female</td>\n",
              "      <td>35.0</td>\n",
              "      <td>53.1000</td>\n",
              "      <td>First</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>male</td>\n",
              "      <td>35.0</td>\n",
              "      <td>8.0500</td>\n",
              "      <td>Third</td>\n",
              "      <td>Southampton</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-1bc86b89-9055-4685-a83b-a89268104b28')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-1bc86b89-9055-4685-a83b-a89268104b28 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-1bc86b89-9055-4685-a83b-a89268104b28');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "summary": "{\n  \"name\": \"display(X\",\n  \"rows\": 5,\n  \"fields\": [\n    {\n      \"column\": \"sex\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"female\",\n          \"male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 6.833739825307955,\n        \"min\": 22.0,\n        \"max\": 38.0,\n        \"num_unique_values\": 4,\n        \"samples\": [\n          38.0,\n          35.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 30.5100288352535,\n        \"min\": 7.25,\n        \"max\": 71.2833,\n        \"num_unique_values\": 5,\n        \"samples\": [\n          71.2833,\n          8.05\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"class\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"First\",\n          \"Third\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embark_town\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"Cherbourg\",\n          \"Southampton\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alone\",\n      \"properties\": {\n        \"dtype\": \"boolean\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          true,\n          false\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "15cb730c",
        "outputId": "8f852295-a151-4942-b1d0-c163351a517f"
      },
      "source": [
        "X.shape"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(891, 6)"
            ]
          },
          "metadata": {},
          "execution_count": 34
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "42d82e7e",
        "outputId": "81ccfc58-d19f-4c0d-b329-de45f0706430"
      },
      "source": [
        "Y.shape"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(891,)"
            ]
          },
          "metadata": {},
          "execution_count": 35
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Dealing with messy data"
      ],
      "metadata": {
        "id": "BN_Y-5xcReHs"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import seaborn as sns\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "tips = sns.load_dataset('titanic')"
      ],
      "metadata": {
        "id": "6V7NzckGO9cR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "c79dedfd"
      },
      "source": [
        "# Task\n",
        "Check for and handle missing values and outliers in the features matrix `X`. Then, summarize the findings and the methods used to address them."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "a33d522d"
      },
      "source": [
        "## Check Missing Values\n",
        "\n",
        "### Subtask:\n",
        "Identify and count the number of missing values in each column of the features matrix `X`. This will help us understand the extent of missing data.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ea922db0"
      },
      "source": [
        "**Reasoning**:\n",
        "To identify and count missing values in each column of the features matrix `X`, I will use the `.isnull()` method followed by `.sum()` to get the total count for each column. This directly addresses the subtask's instructions.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "d43f1b5d",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b6879fd0-16bf-4fc1-c951-b2cedc66202b"
      },
      "source": [
        "missing_values = X.isnull().sum()\n",
        "print(\"Missing values per column in X:\")\n",
        "print(missing_values)"
      ],
      "execution_count": 37,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Missing values per column in X:\n",
            "sex              0\n",
            "age            177\n",
            "fare             0\n",
            "class            0\n",
            "embark_town      2\n",
            "alone            0\n",
            "dtype: int64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9dfd8c70"
      },
      "source": [
        "# Task\n",
        "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "26083663"
      },
      "source": [
        "## Handle Missing Values in 'age'\n",
        "\n",
        "### Subtask:\n",
        "Impute missing values in the 'age' column of DataFrame `X` using the median of the 'age' column.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6d688010"
      },
      "source": [
        "**Reasoning**:\n",
        "To impute the missing values in the 'age' column, I will first calculate its median as specified in the instructions.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "c8c06424",
        "outputId": "0b48740b-a9d0-4593-a370-c99d172892fc"
      },
      "source": [
        "median_age = X['age'].median()\n",
        "print(f\"Median age: {median_age}\")"
      ],
      "execution_count": 40,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Median age: 28.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "40c208e7"
      },
      "source": [
        "**Reasoning**:\n",
        "Now that the median age has been calculated, I will use it to fill the missing values in the 'age' column of DataFrame `X`, and then verify the imputation by checking for remaining missing values.\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "77b9c2d7",
        "outputId": "c63b0165-7ffb-43ca-879f-979f7724b4cc"
      },
      "source": [
        "features = [\"age\", \"fare\"]\n",
        "for feature in features:\n",
        "    min_ = X[feature].mean() - (3 * X[feature].std())\n",
        "    max_ = X[feature].mean() + (3 * X[feature].std())\n",
        "    X = X[X[feature] <= max_]\n",
        "    X = X[X[feature] >= min_]\n",
        "    print(feature,    \":\", X.shape)"
      ],
      "execution_count": 46,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "age : (884, 6)\n",
            "fare : (864, 6)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "features = [\"sex\", \"class\", \"embark_town\", \"alone\"]\n",
        "for feature in features:\n",
        "    count_ = X[feature].value_counts()\n",
        "    print(feature)\n",
        "    print(count_, \"\\n\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_PFKkCUKW1JE",
        "outputId": "4a4c1e72-57a0-4a02-a591-d5a9d8781c33"
      },
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "sex\n",
            "sex\n",
            "male      562\n",
            "female    302\n",
            "Name: count, dtype: int64 \n",
            "\n",
            "class\n",
            "class\n",
            "Third     489\n",
            "First     192\n",
            "Second    183\n",
            "Name: count, dtype: int64 \n",
            "\n",
            "embark_town\n",
            "embark_town\n",
            "Southampton    632\n",
            "Cherbourg      154\n",
            "Queenstown      76\n",
            "Name: count, dtype: int64 \n",
            "\n",
            "alone\n",
            "alone\n",
            "True     524\n",
            "False    340\n",
            "Name: count, dtype: int64 \n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "enc = LabelEncoder()\n",
        "X[\"sex\"] = enc.fit_transform(X['sex'].astype('str'))\n",
        "X[\"class\"] = enc.fit_transform(X['class'].astype('str'))\n",
        "X[\"embark_town\"] = enc.fit_transform(X['embark_town'].\\\n",
        "                                     astype('str'))\n",
        "X[\"alone\"] = enc.fit_transform(X['alone'].astype('str'))\n",
        "X.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "AwsqNomXW45N",
        "outputId": "990b54f7-d636-423f-8522-73af7a5b9cca"
      },
      "execution_count": 49,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   sex   age     fare  class  embark_town  alone\n",
              "0    1  22.0   7.2500      2            2      0\n",
              "1    0  38.0  71.2833      0            0      0\n",
              "2    0  26.0   7.9250      2            2      1\n",
              "3    0  35.0  53.1000      0            2      0\n",
              "4    1  35.0   8.0500      2            2      1"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sex</th>\n",
              "      <th>age</th>\n",
              "      <th>fare</th>\n",
              "      <th>class</th>\n",
              "      <th>embark_town</th>\n",
              "      <th>alone</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>22.0</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>0</td>\n",
              "      <td>38.0</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>0</td>\n",
              "      <td>26.0</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0</td>\n",
              "      <td>35.0</td>\n",
              "      <td>53.1000</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "      <td>35.0</td>\n",
              "      <td>8.0500</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-ee9f1e2a-0d9f-4f33-bd20-8443e95695f8');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "X",
              "summary": "{\n  \"name\": \"X\",\n  \"rows\": 864,\n  \"fields\": [\n    {\n      \"column\": \"sex\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          0,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 12.498758947613258,\n        \"min\": 0.42,\n        \"max\": 66.0,\n        \"num_unique_values\": 83,\n        \"samples\": [\n          5.0,\n          22.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 29.400192357023762,\n        \"min\": 0.0,\n        \"max\": 164.8667,\n        \"num_unique_values\": 239,\n        \"samples\": [\n          7.8958,\n          51.8625\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"class\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 2,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          2,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"embark_town\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 3,\n        \"num_unique_values\": 4,\n        \"samples\": [\n          0,\n          3\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"alone\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 49
        }
      ]
    }
  ]
}