{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KHnddFeW5hwh"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import urllib.request\n",
        "import zipfile\n",
        "import json\n",
        "import pandas as pd\n",
        "import time\n",
        "import torch\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import torch.nn as nn\n",
        "import torch.nn.functional as F\n",
        "import torch.optim as optim\n",
        "from torch.utils.data import DataLoader, TensorDataset\n",
        "from sklearn.model_selection import train_test_split\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.preprocessing import LabelEncoder"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "l7pGG_d85lzH"
      },
      "outputs": [],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "id": "dL8TIlH55qSc"
      },
      "outputs": [],
      "source": [
        "import shutil\n",
        "import os\n",
        "\n",
        "def copy_file(src, dst):\n",
        "  dst_dir = os.path.dirname(dst)\n",
        "  if not os.path.exists(dst_dir):\n",
        "    os.makedirs(dst_dir)\n",
        "\n",
        "  shutil.copy2(src, dst)\n",
        "\n",
        "copy_file('/content/drive/MyDrive/rec_data/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "LLy-YA775snY"
      },
      "outputs": [],
      "source": [
        "def unzip_archive(filepath, dir_path):\n",
        "  with zipfile.ZipFile(f\"{filepath}\", 'r') as zip_ref:\n",
        "    zip_ref.extractall(dir_path)\n",
        "\n",
        "unzip_archive(os.getcwd() + '/data/raw/spotify_million_playlist_dataset.zip', os.getcwd() + '/data/raw/playlists')\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "YtO0seclE1Pb"
      },
      "outputs": [],
      "source": [
        "import shutil\n",
        "\n",
        "def make_dir(directory):\n",
        "    if os.path.exists(directory):\n",
        "        shutil.rmtree(directory)\n",
        "        os.makedirs(directory)\n",
        "    else:\n",
        "        os.makedirs(directory)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "UeqDk3_65vTt"
      },
      "outputs": [],
      "source": [
        "directory = os.getcwd() + '/data/raw/data'\n",
        "make_dir(directory)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "zMTup29b5wtO"
      },
      "outputs": [],
      "source": [
        "cols = [\n",
        "    'name',\n",
        "    'pid',\n",
        "    'num_followers',\n",
        "    'pos',\n",
        "    'artist_name',\n",
        "    'track_name',\n",
        "    'album_name'\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "h6jQO9HT5zsG",
        "outputId": "ec229c95-c29b-4622-bccf-0fc0bb69f9ba"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "mpd.slice.727000-727999.json\t100/1000\t10.0%"
          ]
        }
      ],
      "source": [
        "directory = os.getcwd() + '/data/raw/playlists/data'\n",
        "df = pd.DataFrame()\n",
        "index = 0\n",
        "# Loop through all files in the directory\n",
        "for filename in os.listdir(directory):\n",
        "    # Check if the item is a file (not a subdirectory)\n",
        "    if os.path.isfile(os.path.join(directory, filename)):\n",
        "        if filename.find('.json') != -1 :\n",
        "          index += 1\n",
        "\n",
        "          # Print the filename or perform operations on the file\n",
        "          print(f'\\r{filename}\\t{index}/1000\\t{((index/1000)*100):.1f}%', end='')\n",
        "\n",
        "          # If you need the full file path, you can use:\n",
        "          full_path = os.path.join(directory, filename)\n",
        "\n",
        "          with open(full_path, 'r') as file:\n",
        "              json_data = json.load(file)\n",
        "\n",
        "          temp = pd.DataFrame(json_data['playlists'])\n",
        "          expanded_df = temp.explode('tracks').reset_index(drop=True)\n",
        "\n",
        "          # Normalize the JSON data\n",
        "          json_normalized = pd.json_normalize(expanded_df['tracks'])\n",
        "\n",
        "          # Concatenate the original DataFrame with the normalized JSON data\n",
        "          result = pd.concat([expanded_df.drop(columns=['tracks']), json_normalized], axis=1)\n",
        "\n",
        "          result = result[cols]\n",
        "\n",
        "          df = pd.concat([df, result], axis=0, ignore_index=True)\n",
        "\n",
        "        if index % 50 == 0:\n",
        "            df.to_parquet(f'{os.getcwd()}/data/raw/data/playlists_{index % 1000}.parquet')\n",
        "            del df\n",
        "            df = pd.DataFrame()\n",
        "            if index % 100 == 0:\n",
        "                break"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "PngL0QHq516u"
      },
      "outputs": [],
      "source": [
        "import pyarrow.parquet as pq\n",
        "\n",
        "def read_parquet_folder(folder_path):\n",
        "    dataframes = []\n",
        "    for file in os.listdir(folder_path):\n",
        "        if file.endswith('.parquet'):\n",
        "            file_path = os.path.join(folder_path, file)\n",
        "            df = pd.read_parquet(file_path)\n",
        "            dataframes.append(df)\n",
        "\n",
        "    return pd.concat(dataframes, ignore_index=True)\n",
        "\n",
        "folder_path = os.getcwd() + '/data/raw/data'\n",
        "df = read_parquet_folder(folder_path)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "hdLpjr2153b_"
      },
      "outputs": [],
      "source": [
        "directory = os.getcwd() + '/data/raw/mappings'\n",
        "make_dir(directory)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "peZyue6t57Mz"
      },
      "outputs": [],
      "source": [
        "def create_ids(df, col, name):\n",
        "    # Create a dictionary mapping unique values to IDs\n",
        "    value_to_id = {val: i for i, val in enumerate(df[col].unique())}\n",
        "\n",
        "    # Create a new column with the IDs\n",
        "    df[f'{name}_id'] = df[col].map(value_to_id)\n",
        "    df[[f'{name}_id', col]].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/{name}.csv')\n",
        "\n",
        "    return df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "p68WNyaf58rS"
      },
      "outputs": [],
      "source": [
        "df = create_ids(df, 'artist_name', 'artist')\n",
        "df = create_ids(df, 'pid', 'playlist')\n",
        "df = create_ids(df, 'album_name', 'album')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "aSBKxRFa5-O_"
      },
      "outputs": [],
      "source": [
        "df['song_count'] = df.groupby(['pid','artist_name','album_name'])['track_name'].transform('nunique')\n",
        "\n",
        "df['playlist_songs'] = df.groupby(['pid'])['pos'].transform('max')\n",
        "df['playlist_songs'] += 1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "4WqHH-pn5_nL"
      },
      "outputs": [],
      "source": [
        "df['artist_album'] = df[['artist_name', 'album_name']].agg('::'.join, axis=1)\n",
        "\n",
        "# Step 2: Create a dictionary mapping unique combined values to IDs\n",
        "value_to_id = {val: i for i, val in enumerate(df['artist_album'].unique())}\n",
        "\n",
        "# Step 3: Map these IDs back to the DataFrame\n",
        "df['artist_album_id'] = df['artist_album'].map(value_to_id)\n",
        "\n",
        "df[[f'artist_album_id', 'artist_album', 'artist_name', 'album_name', 'track_name']].drop_duplicates().to_csv(os.getcwd() + f'/data/raw/mappings/artist_album.csv')\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "V1bhU5rW6BSY"
      },
      "outputs": [],
      "source": [
        "df['song_count'] =  df.groupby(['playlist_id','artist_album_id'])['song_count'].transform('sum')\n",
        "\n",
        "encoder = LabelEncoder()\n",
        "encoder.fit(df['track_name'])\n",
        "df['track_id'] = encoder.transform(df['track_name'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "l6sUWKYC6DCw"
      },
      "outputs": [],
      "source": [
        "df['song_percent'] = df['song_count'] / df['playlist_songs']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "XxC0WnlL6EWz"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "\n",
        "df['song_percent'] = 1 / (1 + np.exp(-df['song_percent']))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "kbxBcQiX6F2v",
        "outputId": "eb1fe0b1-83df-4a31-9110-5c904ad14af9"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "artists"
            },
            "text/html": [
              "\n",
              "  <div id=\"df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>playlist_id</th>\n",
              "      <th>artist_id</th>\n",
              "      <th>album_id</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>0</td>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>2</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0</td>\n",
              "      <td>4</td>\n",
              "      <td>4</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-cedfd0c3-1f93-4a45-b95c-5d58bbf23f45');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-066c4d9a-38ab-411d-b575-92d90726ec60\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-066c4d9a-38ab-411d-b575-92d90726ec60')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-066c4d9a-38ab-411d-b575-92d90726ec60 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "text/plain": [
              "   playlist_id  artist_id  album_id\n",
              "0            0          0         0\n",
              "1            0          1         1\n",
              "2            0          2         2\n",
              "3            0          3         3\n",
              "4            0          4         4"
            ]
          },
          "execution_count": 18,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "artists = df.loc[:,['playlist_id','artist_id','album_id']].drop_duplicates()\n",
        "artists.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5HLSc9z36Izn"
      },
      "outputs": [],
      "source": [
        "X = artists.loc[:,['artist_id','album_id',]]\n",
        "y = artists.loc[:,'playlist_id',]\n",
        "\n",
        "# Split our data into training and test sets\n",
        "X_train, X_val, y_train, y_val = train_test_split(X,y,random_state=0, test_size=0.2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 21,
      "metadata": {
        "id": "k47MaxR65Nq4"
      },
      "outputs": [],
      "source": [
        "from sklearn.cluster import DBSCAN\n",
        "db_model = DBSCAN(eps=0.2,min_samples=5)\n",
        "labels_db = db_model.fit_predict(X)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 27,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Osq-NpGu9V2k",
        "outputId": "cb9f28e0-1a44-4208-f520-e09ff274d48b"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Precision: 1.589262536579764e-05\n",
            "Recall: 9.606273770069471e-06\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
            "  _warn_prf(average, modifier, msg_start, len(result))\n"
          ]
        }
      ],
      "source": [
        "from sklearn.metrics import precision_score, recall_score\n",
        "y_no_noise = y[labels_db != -1]\n",
        "labels_db_no_noise = labels_db[labels_db != -1]\n",
        "\n",
        "precision = precision_score(y_no_noise, labels_db_no_noise, average='weighted')\n",
        "recall = recall_score(y_no_noise, labels_db_no_noise, average='weighted')\n",
        "\n",
        "print(f'Precision: {precision}')\n",
        "print(f'Recall: {recall}')"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "machine_shape": "hm",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}