Spaces:
Runtime error
Runtime error
| from __future__ import print_function | |
| import os | |
| import json | |
| import time | |
| import sys | |
| import pandas as pd | |
| import numpy as np | |
| import seaborn as sn | |
| import gradio as gr | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.neighbors import NearestNeighbors | |
| import spotipy | |
| from spotipy.oauth2 import SpotifyClientCredentials | |
| client_id = os.getenv("SPOTIPY_CLIENT_ID") | |
| client_secret = os.getenv("SPOTIPY_CLIENT_SECRET") | |
| sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)) | |
| df = pd.read_csv('spotify_data.csv') | |
| df = df.drop(columns=['Unnamed: 0', "Unnamed: 0.1", "pos", "artist_uri", "album_uri", "duration_ms_x", "album_name", "name", "type", "id", "track_href", "analysis_url", "duration_ms_y", "time_signature", "artist_pop", "track_pop"]) | |
| df.drop_duplicates(subset=['uri'], inplace=True) | |
| df.reset_index(drop=True, inplace=True) | |
| df_num = df.select_dtypes(include = ['float64', 'int64']) | |
| numeric_cols = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'] | |
| categorical_cols = ['key', 'mode'] | |
| # Create the preprocessing pipeline | |
| preprocessing_pipeline = ColumnTransformer( | |
| transformers=[ | |
| ('num', StandardScaler(), numeric_cols), | |
| ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols) | |
| ]) | |
| # Apply the preprocessing pipeline to your DataFrame | |
| df_processed = preprocessing_pipeline.fit_transform(df_num) | |
| num_cols_transformed = numeric_cols | |
| cat_cols_transformed = preprocessing_pipeline.named_transformers_['cat'].get_feature_names_out(categorical_cols) | |
| # Combine the transformed column names | |
| all_cols_transformed = num_cols_transformed + cat_cols_transformed.tolist() | |
| # Convert the processed NumPy array back to a DataFrame | |
| df_processed = pd.DataFrame(df_processed, columns=all_cols_transformed) | |
| def transform_query(track_uri): | |
| audio_features = sp.audio_features(track_uri)[0] | |
| track_data = [] | |
| track_dict = { | |
| 'acousticness': audio_features['acousticness'], | |
| 'danceability': audio_features['danceability'], | |
| 'energy': audio_features['energy'], | |
| 'instrumentalness': audio_features['instrumentalness'], | |
| 'liveness': audio_features['liveness'], | |
| 'loudness': audio_features['loudness'], | |
| 'speechiness': audio_features['speechiness'], | |
| 'tempo': audio_features['tempo'], | |
| 'valence': audio_features['valence'], | |
| 'key': audio_features['key'], | |
| 'mode': audio_features['mode'] | |
| } | |
| track_data.append(track_dict) | |
| query_data = pd.DataFrame(track_data) | |
| return query_data | |
| knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean') | |
| knn_model.fit(df_processed) # I'm using all the data for KNN | |
| # Function to find similar songs to the input URI | |
| def find_similar_songs(track_uri): | |
| query_data = transform_query(track_uri) | |
| # Scale the query data using the same scaler | |
| query_data_scaled = preprocessing_pipeline.transform(query_data) | |
| query_data_scaled_df = pd.DataFrame(query_data_scaled, columns=all_cols_transformed) | |
| # Find the most similar songs using the KNN model | |
| distances, indices = knn_model.kneighbors(query_data_scaled_df, n_neighbors=10) | |
| # Retrieve the Artist Name, Song Name, and Track URI of the most similar songs | |
| similar_songs = [] | |
| for index in indices[0]: | |
| artist_name = df.iloc[index]['artist_name'] | |
| song_name = df.iloc[index]['track_name'] | |
| similar_uri = df.iloc[index]['uri'] | |
| track_id = similar_uri.split(":")[-1] | |
| full_url = f"https://open.spotify.com/track/{track_id}" | |
| similar_songs.append((artist_name, song_name, full_url)) | |
| return similar_songs | |
| similar_songs = find_similar_songs('https://open.spotify.com/track/6rDaCGqcQB1urhpCrrD599?si=2ac7add2ea054ab2') | |
| def format_output(similar_songs): | |
| output = [] | |
| for song in similar_songs: | |
| output.append({"Artist Name": song[0], "Song Name": song[1], "Spotify Track URL": song[2]}) | |
| return pd.DataFrame(output) | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=find_similar_songs, # Your find_similar_songs function | |
| inputs=gr.Textbox(label="Enter Spotify Track URL"), | |
| outputs=gr.Dataframe(headers=["Artist Name", "Song Name", "Spotify Track URL"]), | |
| live=True | |
| ) | |
| iface.launch("share=True") | |