Update app.py
Browse files
app.py
CHANGED
|
@@ -9,12 +9,22 @@ BUNDLE_PATH = "spotify_recommender.pkl"
|
|
| 9 |
with open(BUNDLE_PATH, "rb") as f:
|
| 10 |
bundle = pickle.load(f)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
nn_model: NearestNeighbors = bundle["nn_model"]
|
| 13 |
features: np.ndarray = bundle["features"]
|
| 14 |
track_labels = bundle["track_labels"]
|
| 15 |
|
|
|
|
| 16 |
label_to_index = {label: i for i, label in enumerate(track_labels)}
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def _split_label(label: str):
|
| 19 |
"""
|
| 20 |
label format: 'track_name – artist_name'
|
|
@@ -26,7 +36,6 @@ def _split_label(label: str):
|
|
| 26 |
track_name, artist_name = label, ""
|
| 27 |
return track_name, artist_name
|
| 28 |
|
| 29 |
-
|
| 30 |
def recommend_tracks_ui(query_label: str, k: int):
|
| 31 |
"""
|
| 32 |
Gradio-facing function that:
|
|
@@ -39,20 +48,17 @@ def recommend_tracks_ui(query_label: str, k: int):
|
|
| 39 |
)
|
| 40 |
|
| 41 |
idx = label_to_index[query_label]
|
| 42 |
-
|
| 43 |
n_neighbors = min(len(features), int(k) + 1)
|
| 44 |
distances, indices = nn_model.kneighbors(
|
| 45 |
features[idx:idx + 1],
|
| 46 |
n_neighbors=n_neighbors
|
| 47 |
)
|
| 48 |
-
|
| 49 |
distances = distances[0]
|
| 50 |
indices = indices[0]
|
| 51 |
|
| 52 |
mask = indices != idx
|
| 53 |
indices = indices[mask][:int(k)]
|
| 54 |
distances = distances[mask][:int(k)]
|
| 55 |
-
|
| 56 |
similarities = 1.0 - distances
|
| 57 |
|
| 58 |
rows = []
|
|
@@ -84,7 +90,7 @@ def evaluate_mean_similarity_ui(k: int, n_samples: int):
|
|
| 84 |
if n == 0:
|
| 85 |
return "No tracks in feature matrix."
|
| 86 |
|
| 87 |
-
n_samples = min(n_samples, n)
|
| 88 |
|
| 89 |
rng = np.random.default_rng(42)
|
| 90 |
sample_indices = rng.choice(n, size=n_samples, replace=False)
|
|
@@ -124,17 +130,22 @@ def evaluate_mean_similarity_ui(k: int, n_samples: int):
|
|
| 124 |
with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
|
| 125 |
gr.Markdown("# 🎧 Spotify Content-Based Recommender")
|
| 126 |
gr.Markdown(
|
| 127 |
-
"Select a song
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
)
|
| 129 |
|
| 130 |
with gr.Tab("Recommender"):
|
| 131 |
song_input = gr.Dropdown(
|
| 132 |
-
choices=
|
| 133 |
label="Choose a track",
|
| 134 |
)
|
| 135 |
k_input = gr.Slider(
|
| 136 |
minimum=1,
|
| 137 |
-
maximum=
|
| 138 |
value=10,
|
| 139 |
step=1,
|
| 140 |
label="Number of recommendations (k)",
|
|
@@ -154,10 +165,11 @@ with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
|
|
| 154 |
with gr.Tab("Evaluation"):
|
| 155 |
gr.Markdown(
|
| 156 |
"We measure quality using **mean cosine similarity** between query tracks "
|
| 157 |
-
"and their top-k recommendations."
|
|
|
|
| 158 |
)
|
| 159 |
-
k_eval = gr.Slider(1,
|
| 160 |
-
n_eval = gr.Slider(50,
|
| 161 |
eval_button = gr.Button("Run evaluation")
|
| 162 |
eval_output = gr.Textbox(label="Result")
|
| 163 |
|
|
|
|
| 9 |
with open(BUNDLE_PATH, "rb") as f:
|
| 10 |
bundle = pickle.load(f)
|
| 11 |
|
| 12 |
+
# Expected keys in the bundle:
|
| 13 |
+
# "nn_model" : fitted NearestNeighbors model
|
| 14 |
+
# "features" : feature matrix (N x D), e.g. float32
|
| 15 |
+
# "track_labels": list of "track_name – artist_name" strings
|
| 16 |
nn_model: NearestNeighbors = bundle["nn_model"]
|
| 17 |
features: np.ndarray = bundle["features"]
|
| 18 |
track_labels = bundle["track_labels"]
|
| 19 |
|
| 20 |
+
# Map label -> index for quick lookup (for ALL songs, not just dropdown)
|
| 21 |
label_to_index = {label: i for i, label in enumerate(track_labels)}
|
| 22 |
|
| 23 |
+
# Limit how many songs appear in the dropdown to avoid UI / memory issues
|
| 24 |
+
MAX_DROPDOWN_SONGS = 1000 # you can reduce this to 500 if needed
|
| 25 |
+
display_labels = track_labels[:MAX_DROPDOWN_SONGS]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
def _split_label(label: str):
|
| 29 |
"""
|
| 30 |
label format: 'track_name – artist_name'
|
|
|
|
| 36 |
track_name, artist_name = label, ""
|
| 37 |
return track_name, artist_name
|
| 38 |
|
|
|
|
| 39 |
def recommend_tracks_ui(query_label: str, k: int):
|
| 40 |
"""
|
| 41 |
Gradio-facing function that:
|
|
|
|
| 48 |
)
|
| 49 |
|
| 50 |
idx = label_to_index[query_label]
|
|
|
|
| 51 |
n_neighbors = min(len(features), int(k) + 1)
|
| 52 |
distances, indices = nn_model.kneighbors(
|
| 53 |
features[idx:idx + 1],
|
| 54 |
n_neighbors=n_neighbors
|
| 55 |
)
|
|
|
|
| 56 |
distances = distances[0]
|
| 57 |
indices = indices[0]
|
| 58 |
|
| 59 |
mask = indices != idx
|
| 60 |
indices = indices[mask][:int(k)]
|
| 61 |
distances = distances[mask][:int(k)]
|
|
|
|
| 62 |
similarities = 1.0 - distances
|
| 63 |
|
| 64 |
rows = []
|
|
|
|
| 90 |
if n == 0:
|
| 91 |
return "No tracks in feature matrix."
|
| 92 |
|
| 93 |
+
n_samples = min(n_samples, n, 200)
|
| 94 |
|
| 95 |
rng = np.random.default_rng(42)
|
| 96 |
sample_indices = rng.choice(n, size=n_samples, replace=False)
|
|
|
|
| 130 |
with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
|
| 131 |
gr.Markdown("# 🎧 Spotify Content-Based Recommender")
|
| 132 |
gr.Markdown(
|
| 133 |
+
"Select a song (subset of all tracks for performance) and get similar tracks "
|
| 134 |
+
"using a trained Nearest Neighbors model."
|
| 135 |
+
)
|
| 136 |
+
gr.Markdown(
|
| 137 |
+
f"Currently showing the first **{len(display_labels)}** songs in the dropdown "
|
| 138 |
+
"to keep the app responsive."
|
| 139 |
)
|
| 140 |
|
| 141 |
with gr.Tab("Recommender"):
|
| 142 |
song_input = gr.Dropdown(
|
| 143 |
+
choices=display_labels,
|
| 144 |
label="Choose a track",
|
| 145 |
)
|
| 146 |
k_input = gr.Slider(
|
| 147 |
minimum=1,
|
| 148 |
+
maximum=15,
|
| 149 |
value=10,
|
| 150 |
step=1,
|
| 151 |
label="Number of recommendations (k)",
|
|
|
|
| 165 |
with gr.Tab("Evaluation"):
|
| 166 |
gr.Markdown(
|
| 167 |
"We measure quality using **mean cosine similarity** between query tracks "
|
| 168 |
+
"and their top-k recommendations. Evaluation is limited in size to keep "
|
| 169 |
+
"the app stable."
|
| 170 |
)
|
| 171 |
+
k_eval = gr.Slider(1, 15, value=10, step=1, label="k (top-k neighbors)")
|
| 172 |
+
n_eval = gr.Slider(50, 200, value=100, step=25, label="Number of random tracks to sample")
|
| 173 |
eval_button = gr.Button("Run evaluation")
|
| 174 |
eval_output = gr.Textbox(label="Result")
|
| 175 |
|