Iueleflaekkefar's picture
Update app.py
85c0cc5 verified
import pickle
import numpy as np
import pandas as pd
import gradio as gr
from sklearn.neighbors import NearestNeighbors
BUNDLE_PATH = "spotify_recommender.pkl"
with open(BUNDLE_PATH, "rb") as f:
bundle = pickle.load(f)
features_all: np.ndarray = bundle["features"]
track_labels_all = bundle["track_labels"]
MAX_SONGS = 100
n_total = features_all.shape[0]
n_used = min(MAX_SONGS, n_total)
features = features_all[:n_used]
track_labels = track_labels_all[:n_used]
nn_model = NearestNeighbors(metric="cosine", algorithm="brute")
nn_model.fit(features)
label_to_index = {label: i for i, label in enumerate(track_labels)}
def _split_label(label: str):
"""
label format: 'track_name – artist_name'
Uses an en dash (U+2013). Falls back gracefully if not present.
"""
if " – " in label:
track_name, artist_name = label.split(" – ", 1)
else:
track_name, artist_name = label, ""
return track_name, artist_name
def recommend_tracks_ui(query_label: str, k: int):
"""
Gradio-facing function:
- find k nearest neighbors
- return a DataFrame with track_name, artist_name, similarity
"""
if query_label not in label_to_index:
return pd.DataFrame(
{"error": ["Track not found. Please select from the dropdown."]}
)
idx = label_to_index[query_label]
k = int(k)
n_neighbors = min(len(features), k + 1)
distances, indices = nn_model.kneighbors(
features[idx:idx + 1],
n_neighbors=n_neighbors
)
distances = distances[0]
indices = indices[0]
mask = indices != idx
indices = indices[mask][:k]
distances = distances[mask][:k]
similarities = 1.0 - distances
rows = []
for i, sim in zip(indices, similarities):
track_name, artist_name = _split_label(track_labels[i])
rows.append({
"track_name": track_name,
"artist_name": artist_name,
"similarity": float(sim),
})
if not rows:
return pd.DataFrame({"info": ["No matches, either no matches or you have an unique taste my friend"]})
return pd.DataFrame(rows)
def evaluate_mean_similarity_ui(k: int, n_samples: int):
"""
Evaluation:
- randomly sample n_samples tracks from the subset
- get top-k neighbors
- compute mean cosine similarity of neighbors
"""
k = int(k)
n_samples = int(n_samples)
n = features.shape[0]
if n == 0:
return "No tracks found"
n_samples = min(n_samples, n)
rng = np.random.default_rng(42)
sample_indices = rng.choice(n, size=n_samples, replace=False)
all_means = []
for idx in sample_indices:
n_neighbors = min(n, k + 1)
distances, indices = nn_model.kneighbors(
features[idx:idx + 1],
n_neighbors=n_neighbors
)
distances = distances[0]
indices = indices[0]
# Drop self
mask = indices != idx
distances = distances[mask][:k]
if len(distances) == 0:
continue
similarities = 1.0 - distances
all_means.append(similarities.mean())
if not all_means:
return "evaluation failed, try again my friend"
all_means = np.array(all_means)
mean_sim = float(all_means.mean())
std_sim = float(all_means.std())
return (
f"Mean top-{k} cosine similarity over {len(all_means)} random tracks "
f"(subset of {n_used} tracks): {mean_sim:.4f} ± {std_sim:.4f}"
)
with gr.Blocks(title="Spotify Content-Based Recommender (Subset)") as demo:
gr.Markdown("# Music Recommender - now that what i call music")
gr.Markdown(
f"It only uses **{n_used}** tracks from the full dataset "
"to make sure all PC can handle it"
)
with gr.Tab("Recommender"):
song_input = gr.Dropdown(
choices=track_labels,
label="Choose a track (subset)",
)
k_input = gr.Slider(
minimum=1,
maximum=10,
value=5,
step=1,
label="Number of recommendations",
)
recommend_button = gr.Button("Find recommends - find more music you grove")
rec_output = gr.Dataframe(
label="Recommended Tracks - that match your grove",
interactive=False
)
recommend_button.click(
fn=recommend_tracks_ui,
inputs=[song_input, k_input],
outputs=rec_output,
)
with gr.Tab("Evaluation"):
gr.Markdown(
"The recommender evaluates this subset using **mean cosine similarity** "
"between query tracks and their top-k neighbors."
)
k_eval = gr.Slider(1, 10, value=5, step=1, label="k (top-k neighbors)")
n_eval = gr.Slider(10, 100, value=50, step=10, label="Number of random tracks to sample")
eval_button = gr.Button("Run evaluation")
eval_output = gr.Textbox(label="Result")
eval_button.click(
fn=evaluate_mean_similarity_ui,
inputs=[k_eval, n_eval],
outputs=eval_output,
)
demo.launch()