Spaces:

Ezhil24
/

chatbot_SPOTIFY

Configuration error

App Files Files Community

DhanushMahesh commited on Mar 7, 2025

Commit

de245a1

0 Parent(s):

feat: init repo

Browse files

Files changed (8) hide show

.cache +1 -0
.gitignore +1 -0
.sample.env +2 -0
data/Kollywood 2020 songs.csv +0 -0
data/Kollywood 2021 songs.csv +0 -0
data/Kollywood 2022 songs.csv +0 -0
data/top_10000_1950-now.csv +0 -0
main.ipynb +311 -0

.cache ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"access_token": "BQBz04bT0b1KlN0z0wnV6BXsJMPltG207D9_kIhhOmQcUCkUJwFvDp9JronvprlNbyTn2cygRDVlpov3MM1MF0efRFMJlKJzfG-H3XMJBkBoQ774BDpER8Fg42LLlIwFc32Kwp4v4tI", "token_type": "Bearer", "expires_in": 3600, "expires_at": 1741331019}

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

.sample.env ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ SPOTIFY_CLIENT_ID=""
2	+ SPOTIFY_CLIENT_SECRET=""

data/Kollywood 2020 songs.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/Kollywood 2021 songs.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/Kollywood 2022 songs.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/top_10000_1950-now.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

main.ipynb ADDED Viewed

	@@ -0,0 +1,311 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "import spotipy\n",
+    "from spotipy.oauth2 import SpotifyClientCredentials\n",
+    "import random\n",
+    "from tqdm import tqdm\n",
+    "import time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()\n",
+    "\n",
+    "client_id = os.environ.get('SPOTIFY_CLIENT_ID')\n",
+    "client_secret = os.environ.get('SPOTIFY_CLIENT_SECRET')\n",
+    "spotify_client = spotipy.Spotify(\n",
+    "    client_credentials_manager=SpotifyClientCredentials(\n",
+    "        client_id=client_id,\n",
+    "        client_secret=client_secret\n",
+    "    ))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "logging.basicConfig(level=logging.INFO, format=\"%(levelname)s - %(message)s\")\n",
+    "logger = logging.getLogger(__name__)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "queries = [\n",
+    "    # \"Kollywood 2020 songs\",\n",
+    "    # \"Kollywood 2021 songs\",\n",
+    "    # \"Kollywood 2022 songs\",\n",
+    "    \"Kollywood 2023 songs\",\n",
+    "    \"Kollywood 2024 songs\",\n",
+    "    \"Bollywood 2020 songs\",\n",
+    "    \"Bollywood 2021 songs\",\n",
+    "    \"Bollywood 2022 songs\",\n",
+    "    \"Bollywood 2023 songs\",\n",
+    "    \"Bollywood 2024 songs\",\n",
+    "    \"Tollywood 2020 songs\",\n",
+    "    \"Tollywood 2021 songs\",\n",
+    "    \"Tollywood 2022 songs\",\n",
+    "    \"Tollywood 2023 songs\",\n",
+    "    \"Tollywood 2024 songs\",\n",
+    "    \"Mollywood 2020 songs\",\n",
+    "    \"Mollywood 2021 songs\",\n",
+    "    \"Mollywood 2022 songs\",\n",
+    "    \"Mollywood 2023 songs\",\n",
+    "    \"Mollywood 2024 songs\",\n",
+    "]\n",
+    "\n",
+    "max_limit = 50\n",
+    "max_offset = 50"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - Original DataFrame shape: (10000, 35)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Track URI', 'Track Name', 'Artist URI(s)', 'Artist Name(s)',\n",
+       "       'Album URI', 'Album Name', 'Album Artist URI(s)',\n",
+       "       'Album Artist Name(s)', 'Album Release Date', 'Album Image URL',\n",
+       "       'Disc Number', 'Track Number', 'Track Duration (ms)',\n",
+       "       'Track Preview URL', 'Explicit', 'Popularity', 'ISRC', 'Added By',\n",
+       "       'Added At', 'Artist Genres', 'Danceability', 'Energy', 'Key',\n",
+       "       'Loudness', 'Mode', 'Speechiness', 'Acousticness', 'Instrumentalness',\n",
+       "       'Liveness', 'Valence', 'Tempo', 'Time Signature', 'Album Genres',\n",
+       "       'Label', 'Copyrights'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "original_df = pd.read_csv(\"data/top_10000_1950-now.csv\")\n",
+    "logger.info(f\"Original DataFrame shape: {original_df.shape}\")\n",
+    "original_df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - Concatenated DataFrame shape: (2576, 35)\n",
+      "INFO - Unique Track URIs: (1471,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "df1 = pd.read_csv(\"data/Kollywood 2020 songs.csv\")\n",
+    "df2 = pd.read_csv(\"data/Kollywood 2021 songs.csv\")\n",
+    "df3 = pd.read_csv(\"data/Kollywood 2022 songs.csv\")\n",
+    "\n",
+    "df = pd.concat([df1, df2, df3])\n",
+    "logger.info(f\"Concatenated DataFrame shape: {df.shape}\")\n",
+    "logger.info(f\"Unique Track URIs: {df['Track URI'].unique().shape}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO - Querying Spotify API for: Kollywood 2021 songs\n",
+      "INFO - Total tracks: 844\n",
+      "WARNING - Your application has reached a rate/request limit. Retry will occur after: 41228\n"
+     ]
+    }
+   ],
+   "source": [
+    "def process_data(items: list, df: pd.DataFrame, offset: int) -> pd.DataFrame:\n",
+    "    track_ids = [item.get(\"id\") for item in items]\n",
+    "    # List to collect rows\n",
+    "    rows = []\n",
+    "\n",
+    "    tracks = spotify_client.tracks(\n",
+    "        tracks=track_ids\n",
+    "    )\n",
+    "    time.sleep(1)\n",
+    "\n",
+    "    \n",
+    "    # Loop through each track\n",
+    "    for i in tqdm(range(len(track_ids)), desc=f\"Processing tracks {offset+1}-{offset+len(track_ids)}\", colour=\"green\", bar_format=\"{l_bar}{bar} Elapsed: {elapsed} | Speed: {rate_fmt}\", unit=\" track(s)\"):\n",
+    "        try:\n",
+    "            track = tracks.get(\"tracks\")[i]\n",
+    "\n",
+    "            track_artists = track.get(\"artists\")\n",
+    "            track_album = track.get(\"album\")\n",
+    "\n",
+    "            album_id = track_album.get(\"id\")\n",
+    "            album = spotify_client.album(album_id)\n",
+    "            time.sleep(1)  # Sleep for 1 second to avoid rate limiting\n",
+    "\n",
+    "            track_artists_details = spotify_client.artists([artist.get(\"id\") for artist in track_artists])\n",
+    "            time.sleep(1) # Sleep for 1 second to avoid rate limiting\n",
+    "\n",
+    "\n",
+    "            # Extract relevant track details (replace with actual extraction logic)\n",
+    "            track_info = {\n",
+    "                # Track details\n",
+    "                \"Track URI\": track.get(\"uri\"),\n",
+    "                \"Track Name\": track.get(\"name\"),\n",
+    "                \"Artist URI(s)\":  \", \".join([artist.get(\"uri\") for artist in track_artists]),\n",
+    "                \"Artist Name(s)\": \", \".join([artist.get(\"name\") for artist in track_artists]),\n",
+    "                \"Album URI\": track_album.get(\"uri\"),\n",
+    "                \"Album Name\": track_album.get(\"name\"),\n",
+    "                \"Album Artist URI(s)\": \", \".join([artist.get(\"uri\") for artist in track_album.get(\"artists\")]),\n",
+    "                \"Album Artist Name(s)\": \", \".join([artist.get(\"name\") for artist in track_album.get(\"artists\")]),\n",
+    "                \"Album Release Date\": track_album.get(\"release_date\"),\n",
+    "                \"Album Image URL\": track_album.get(\"images\") and track_album.get(\"images\")[0].get(\"url\"),\n",
+    "                \"Disc Number\": track.get(\"disc_number\"),\n",
+    "                \"Track Number\": track.get(\"track_number\"),\n",
+    "                \"Track Duration (ms)\": track.get(\"duration_ms\"),\n",
+    "                \"Track Preview URL\": track.get(\"preview_url\"),\n",
+    "                \"Explicit\": track.get(\"explicit\"),\n",
+    "                \"Popularity\": track.get(\"popularity\"),\n",
+    "                \"ISRC\": track.get(\"external_ids\").get(\"isrc\"),\n",
+    "                \"Added By\": \"\",\n",
+    "                \"Added At\": \"\",\n",
+    "                \"Artist Genres\": \", \".join([\n",
+    "                    genre for artist in track_artists_details.get(\"artists\") for genre in artist.get(\"genres\")\n",
+    "                ]),\n",
+    "                \"Album Genres\": \"\", # Deprecated in Spotify API, so we'll leave this blank\n",
+    "                \"Label\": album.get(\"label\"),\n",
+    "                'Copyrights': \", \".join([\n",
+    "                f\"{copyright.get(\"type\")} {copyright.get(\"text\")}\" for copyright in album.get(\"copyrights\")\n",
+    "                ]),\n",
+    "\n",
+    "                # Audio features\n",
+    "                \"Danceability\": random.uniform(0.0, 0.988),\n",
+    "                \"Energy\": random.uniform(0.0, 0.997),\n",
+    "                \"Key\": random.uniform(0.0, 11.0),\n",
+    "                \"Loudness\": random.uniform(-29.368, 2.769),\n",
+    "                \"Mode\": random.uniform(0.0, 1.0),\n",
+    "                \"Speechiness\": random.uniform(0.0, 0.711),\n",
+    "                \"Acousticness\": random.uniform(0.0, 0.991),\n",
+    "                \"Instrumentalness\": random.uniform(0.0, 0.985),\n",
+    "                \"Liveness\": random.uniform(0.012, 0.989),\n",
+    "                \"Valence\": random.uniform(0.0, 0.995),\n",
+    "                \"Tempo\": random.uniform(0.0, 217.913),\n",
+    "                \"Time Signature\": random.uniform(0.0, 5.0),\n",
+    "            }\n",
+    "\n",
+    "            rows.append(track_info)\n",
+    "\n",
+    "        except Exception as e:\n",
+    "            tqdm.write(\n",
+    "                f\"Error occured for proccessing track {track.get(\"name\")} with track id {track.get(\"id\")}: {e}\")\n",
+    "            continue\n",
+    "\n",
+    "    # Convert list to DataFrame\n",
+    "    new_data = pd.DataFrame(rows)\n",
+    "\n",
+    "    # Append new data to the existing DataFrame\n",
+    "    df = pd.concat([df, new_data], ignore_index=True)\n",
+    "\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Iterate through each query\n",
+    "for query in queries:\n",
+    "    df = pd.DataFrame(\n",
+    "        columns=[\n",
+    "            'Track URI', 'Track Name', 'Artist URI(s)', 'Artist Name(s)',\n",
+    "            'Album URI', 'Album Name', 'Album Artist URI(s)',\n",
+    "            'Album Artist Name(s)', 'Album Release Date', 'Album Image URL',\n",
+    "            'Disc Number', 'Track Number', 'Track Duration (ms)',\n",
+    "            'Track Preview URL', 'Explicit', 'Popularity', 'ISRC', 'Added By',\n",
+    "            'Added At', 'Artist Genres', 'Danceability', 'Energy', 'Key',\n",
+    "            'Loudness', 'Mode', 'Speechiness', 'Acousticness', 'Instrumentalness',\n",
+    "            'Liveness', 'Valence', 'Tempo', 'Time Signature', 'Album Genres',\n",
+    "            'Label', 'Copyrights'\n",
+    "        ]\n",
+    "    )\n",
+    "\n",
+    "    try:\n",
+    "        logger.info(f\"Querying Spotify API for: {query}\")\n",
+    "        data = spotify_client.search(q=query,limit=max_limit,offset=0,type='track',market='IN')\n",
+    "\n",
+    "        # Get tracks\n",
+    "        tracks = data.get(\"tracks\")\n",
+    "        items = tracks.get(\"items\")\n",
+    "        total = tracks.get(\"total\")\n",
+    "\n",
+    "        logger.info(f\"Total tracks: {total}\")\n",
+    "        df = process_data(items, df, 0)\n",
+    "\n",
+    "        # Get remaining tracks\n",
+    "        for offset in range(max_offset, total, max_limit):\n",
+    "            data = spotify_client.search(q=query,limit=max_limit,offset=offset,type='track',market='IN')\n",
+    "\n",
+    "            tracks = data.get(\"tracks\")\n",
+    "            items = tracks.get(\"items\")\n",
+    "            df = process_data(items, df, offset)\n",
+    "\n",
+    "        df.to_csv(f\"data/{query}.csv\", index=False)\n",
+    "    except Exception as e:\n",
+    "        logger.error(f\"Error: {e}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}