Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import joblib
|
| 5 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
+
|
| 7 |
+
BUNDLE_PATH = "spotify_recommender.joblib" # make sure this is in the Space
|
| 8 |
+
bundle = joblib.load(BUNDLE_PATH)
|
| 9 |
+
|
| 10 |
+
nn_model = bundle["nn_model"]
|
| 11 |
+
features = bundle["features"] # feature matrix (N x D)
|
| 12 |
+
data = bundle["data"] # metadata DataFrame
|
| 13 |
+
track_labels = bundle["track_labels"] # list of "track – artist" labels
|
| 14 |
+
|
| 15 |
+
label_to_index = {label: i for i, label in enumerate(track_labels)}
|
| 16 |
+
|
| 17 |
+
def recommend_tracks_ui(query_label, k):
|
| 18 |
+
if query_label not in label_to_index:
|
| 19 |
+
return pd.DataFrame(
|
| 20 |
+
{"error": ["Track not found. Please select from the dropdown."]}
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
idx = label_to_index[query_label]
|
| 24 |
+
# Get k+1 neighbors and drop the first one (it is the track itself)
|
| 25 |
+
distances, indices = nn_model.kneighbors(
|
| 26 |
+
features[idx:idx+1],
|
| 27 |
+
n_neighbors=min(len(features), int(k) + 1)
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
indices = indices[0]
|
| 31 |
+
distances = distances[0]
|
| 32 |
+
|
| 33 |
+
# remove self (distance 0)
|
| 34 |
+
mask = indices != idx
|
| 35 |
+
indices = indices[mask][:int(k)]
|
| 36 |
+
distances = distances[mask][:int(k)]
|
| 37 |
+
|
| 38 |
+
# cosine similarity = 1 - cosine distance
|
| 39 |
+
similarities = 1.0 - distances
|
| 40 |
+
|
| 41 |
+
results = data.loc[indices, [
|
| 42 |
+
"track_name",
|
| 43 |
+
"artist_name",
|
| 44 |
+
"album_name",
|
| 45 |
+
"track_popularity",
|
| 46 |
+
]].copy()
|
| 47 |
+
results["similarity"] = similarities
|
| 48 |
+
|
| 49 |
+
return results.reset_index(drop=True)
|
| 50 |
+
|
| 51 |
+
def evaluate_mean_similarity_ui(k, n_samples):
|
| 52 |
+
k = int(k)
|
| 53 |
+
n_samples = int(n_samples)
|
| 54 |
+
n = features.shape[0]
|
| 55 |
+
n_samples = min(n_samples, n)
|
| 56 |
+
|
| 57 |
+
rng = np.random.default_rng(42)
|
| 58 |
+
sample_indices = rng.choice(n, size=n_samples, replace=False)
|
| 59 |
+
|
| 60 |
+
all_means = []
|
| 61 |
+
for idx in sample_indices:
|
| 62 |
+
distances, indices = nn_model.kneighbors(
|
| 63 |
+
features[idx:idx+1],
|
| 64 |
+
n_neighbors=min(len(features), k + 1)
|
| 65 |
+
)
|
| 66 |
+
distances = distances[0]
|
| 67 |
+
indices = indices[0]
|
| 68 |
+
|
| 69 |
+
# drop self
|
| 70 |
+
mask = indices != idx
|
| 71 |
+
distances = distances[mask][:k]
|
| 72 |
+
|
| 73 |
+
similarities = 1.0 - distances
|
| 74 |
+
all_means.append(similarities.mean())
|
| 75 |
+
|
| 76 |
+
all_means = np.array(all_means)
|
| 77 |
+
mean_sim = float(all_means.mean())
|
| 78 |
+
std_sim = float(all_means.std())
|
| 79 |
+
|
| 80 |
+
return f"Mean top-{k} cosine similarity over {n_samples} random tracks: {mean_sim:.4f} ± {std_sim:.4f}"
|
| 81 |
+
|
| 82 |
+
with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
|
| 83 |
+
gr.Markdown("# 🎧 Spotify Content-Based Recommender")
|
| 84 |
+
gr.Markdown(
|
| 85 |
+
"Select a song and get similar tracks based on a trained Nearest Neighbors model."
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
with gr.Tab("Recommender"):
|
| 89 |
+
song_input = gr.Dropdown(
|
| 90 |
+
choices=track_labels,
|
| 91 |
+
label="Choose a track",
|
| 92 |
+
)
|
| 93 |
+
k_input = gr.Slider(
|
| 94 |
+
minimum=1,
|
| 95 |
+
maximum=20,
|
| 96 |
+
value=10,
|
| 97 |
+
step=1,
|
| 98 |
+
label="Number of recommendations (k)",
|
| 99 |
+
)
|
| 100 |
+
recommend_button = gr.Button("Recommend")
|
| 101 |
+
rec_output = gr.Dataframe(
|
| 102 |
+
label="Recommended Tracks",
|
| 103 |
+
interactive=False
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
recommend_button.click(
|
| 107 |
+
fn=recommend_tracks_ui,
|
| 108 |
+
inputs=[song_input, k_input],
|
| 109 |
+
outputs=rec_output,
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
with gr.Tab("Evaluation"):
|
| 113 |
+
gr.Markdown(
|
| 114 |
+
"We measure quality using **mean cosine similarity** between query tracks and their top-k recommendations."
|
| 115 |
+
)
|
| 116 |
+
k_eval = gr.Slider(1, 20, value=10, step=1, label="k (top-k neighbors)")
|
| 117 |
+
n_eval = gr.Slider(50, 500, value=200, step=50, label="Number of random tracks to sample")
|
| 118 |
+
eval_button = gr.Button("Run evaluation")
|
| 119 |
+
eval_output = gr.Textbox(label="Result")
|
| 120 |
+
|
| 121 |
+
eval_button.click(
|
| 122 |
+
fn=evaluate_mean_similarity_ui,
|
| 123 |
+
inputs=[k_eval, n_eval],
|
| 124 |
+
outputs=eval_output,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
demo.launch()
|