{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "CELLULE 1 — Imports & Load Data" ], "metadata": { "id": "gpIFwPmXQEmo" } }, { "cell_type": "code", "source": [ "!pip install vaderSentiment statsmodels scikit-learn matplotlib seaborn -q\n", "\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import classification_report, accuracy_score\n", "from sklearn.preprocessing import LabelEncoder\n", "from statsmodels.tsa.arima.model import ARIMA\n", "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "plt.style.use('seaborn-v0_8')\n", "\n", "df = pd.read_csv('spotify_enriched.csv')\n", "df_reviews = pd.read_csv('spotify_with_reviews.csv')\n", "\n", "print(\"Main dataset:\", df.shape)\n", "print(\"Reviews dataset:\", df_reviews.shape)\n", "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 429 }, "id": "H8BLW-91QGaA", "outputId": "6b79947e-485e-49e0-f8b8-e5a97dd6ecde" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Main dataset: (113999, 27)\n", "Reviews dataset: (5000, 28)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ " track_name artists \\\n", "0 Comedy Gen Hoshino \n", "1 Ghost - Acoustic Ben Woodward \n", "2 To Begin Again Ingrid Michaelson;ZAYN \n", "3 Can't Help Falling In Love Kina Grannis \n", "4 Hold On Chord Overstreet \n", "\n", " album_name track_genre popularity \\\n", "0 Comedy acoustic 73 \n", "1 Ghost (Acoustic) acoustic 55 \n", "2 To Begin Again acoustic 57 \n", "3 Crazy Rich Asians (Original Motion Picture Sou... acoustic 71 \n", "4 Hold On acoustic 82 \n", "\n", " duration_ms danceability energy loudness speechiness ... \\\n", "0 230666 0.676 0.4610 -6.746 0.1430 ... \n", "1 149610 0.420 0.1660 -17.235 0.0763 ... \n", "2 210826 0.438 0.3590 -9.734 0.0557 ... \n", "3 201933 0.266 0.0596 -18.515 0.0363 ... \n", "4 198853 0.618 0.4430 -9.681 0.0526 ... \n", "\n", " duration_min popularity_tier monthly_streams playlist_adds skip_rate \\\n", "0 3.844433 High 73056144 46407 0.284 \n", "1 2.493500 Medium 54667427 33076 0.445 \n", "2 3.513767 Medium 56749925 33788 0.447 \n", "3 3.365550 High 70000730 38064 0.402 \n", "4 3.314217 High 81782405 52819 0.424 \n", "\n", " save_rate release_year top_region avg_user_sentiment sentiment_label \n", "0 0.343 2019 Latin America 0.750 Positive \n", "1 0.339 2018 Europe 0.339 Negative \n", "2 0.290 2023 North America 0.323 Negative \n", "3 0.428 2022 Europe 0.321 Negative \n", "4 0.470 2015 Global 0.480 Neutral \n", "\n", "[5 rows x 27 columns]" ], "text/html": [ "\n", "
| \n", " | track_name | \n", "artists | \n", "album_name | \n", "track_genre | \n", "popularity | \n", "duration_ms | \n", "danceability | \n", "energy | \n", "loudness | \n", "speechiness | \n", "... | \n", "duration_min | \n", "popularity_tier | \n", "monthly_streams | \n", "playlist_adds | \n", "skip_rate | \n", "save_rate | \n", "release_year | \n", "top_region | \n", "avg_user_sentiment | \n", "sentiment_label | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Comedy | \n", "Gen Hoshino | \n", "Comedy | \n", "acoustic | \n", "73 | \n", "230666 | \n", "0.676 | \n", "0.4610 | \n", "-6.746 | \n", "0.1430 | \n", "... | \n", "3.844433 | \n", "High | \n", "73056144 | \n", "46407 | \n", "0.284 | \n", "0.343 | \n", "2019 | \n", "Latin America | \n", "0.750 | \n", "Positive | \n", "
| 1 | \n", "Ghost - Acoustic | \n", "Ben Woodward | \n", "Ghost (Acoustic) | \n", "acoustic | \n", "55 | \n", "149610 | \n", "0.420 | \n", "0.1660 | \n", "-17.235 | \n", "0.0763 | \n", "... | \n", "2.493500 | \n", "Medium | \n", "54667427 | \n", "33076 | \n", "0.445 | \n", "0.339 | \n", "2018 | \n", "Europe | \n", "0.339 | \n", "Negative | \n", "
| 2 | \n", "To Begin Again | \n", "Ingrid Michaelson;ZAYN | \n", "To Begin Again | \n", "acoustic | \n", "57 | \n", "210826 | \n", "0.438 | \n", "0.3590 | \n", "-9.734 | \n", "0.0557 | \n", "... | \n", "3.513767 | \n", "Medium | \n", "56749925 | \n", "33788 | \n", "0.447 | \n", "0.290 | \n", "2023 | \n", "North America | \n", "0.323 | \n", "Negative | \n", "
| 3 | \n", "Can't Help Falling In Love | \n", "Kina Grannis | \n", "Crazy Rich Asians (Original Motion Picture Sou... | \n", "acoustic | \n", "71 | \n", "201933 | \n", "0.266 | \n", "0.0596 | \n", "-18.515 | \n", "0.0363 | \n", "... | \n", "3.365550 | \n", "High | \n", "70000730 | \n", "38064 | \n", "0.402 | \n", "0.428 | \n", "2022 | \n", "Europe | \n", "0.321 | \n", "Negative | \n", "
| 4 | \n", "Hold On | \n", "Chord Overstreet | \n", "Hold On | \n", "acoustic | \n", "82 | \n", "198853 | \n", "0.618 | \n", "0.4430 | \n", "-9.681 | \n", "0.0526 | \n", "... | \n", "3.314217 | \n", "High | \n", "81782405 | \n", "52819 | \n", "0.424 | \n", "0.470 | \n", "2015 | \n", "Global | \n", "0.480 | \n", "Neutral | \n", "
5 rows × 27 columns
\n", "