import pickle import numpy as np import pandas as pd import gradio as gr from sklearn.neighbors import NearestNeighbors BUNDLE_PATH = "spotify_recommender.pkl" with open(BUNDLE_PATH, "rb") as f: bundle = pickle.load(f) features_all: np.ndarray = bundle["features"] track_labels_all = bundle["track_labels"] MAX_SONGS = 100 n_total = features_all.shape[0] n_used = min(MAX_SONGS, n_total) features = features_all[:n_used] track_labels = track_labels_all[:n_used] nn_model = NearestNeighbors(metric="cosine", algorithm="brute") nn_model.fit(features) label_to_index = {label: i for i, label in enumerate(track_labels)} def _split_label(label: str): """ label format: 'track_name – artist_name' Uses an en dash (U+2013). Falls back gracefully if not present. """ if " – " in label: track_name, artist_name = label.split(" – ", 1) else: track_name, artist_name = label, "" return track_name, artist_name def recommend_tracks_ui(query_label: str, k: int): """ Gradio-facing function: - find k nearest neighbors - return a DataFrame with track_name, artist_name, similarity """ if query_label not in label_to_index: return pd.DataFrame( {"error": ["Track not found. Please select from the dropdown."]} ) idx = label_to_index[query_label] k = int(k) n_neighbors = min(len(features), k + 1) distances, indices = nn_model.kneighbors( features[idx:idx + 1], n_neighbors=n_neighbors ) distances = distances[0] indices = indices[0] mask = indices != idx indices = indices[mask][:k] distances = distances[mask][:k] similarities = 1.0 - distances rows = [] for i, sim in zip(indices, similarities): track_name, artist_name = _split_label(track_labels[i]) rows.append({ "track_name": track_name, "artist_name": artist_name, "similarity": float(sim), }) if not rows: return pd.DataFrame({"info": ["No matches, either no matches or you have an unique taste my friend"]}) return pd.DataFrame(rows) def evaluate_mean_similarity_ui(k: int, n_samples: int): """ Evaluation: - randomly sample n_samples tracks from the subset - get top-k neighbors - compute mean cosine similarity of neighbors """ k = int(k) n_samples = int(n_samples) n = features.shape[0] if n == 0: return "No tracks found" n_samples = min(n_samples, n) rng = np.random.default_rng(42) sample_indices = rng.choice(n, size=n_samples, replace=False) all_means = [] for idx in sample_indices: n_neighbors = min(n, k + 1) distances, indices = nn_model.kneighbors( features[idx:idx + 1], n_neighbors=n_neighbors ) distances = distances[0] indices = indices[0] # Drop self mask = indices != idx distances = distances[mask][:k] if len(distances) == 0: continue similarities = 1.0 - distances all_means.append(similarities.mean()) if not all_means: return "evaluation failed, try again my friend" all_means = np.array(all_means) mean_sim = float(all_means.mean()) std_sim = float(all_means.std()) return ( f"Mean top-{k} cosine similarity over {len(all_means)} random tracks " f"(subset of {n_used} tracks): {mean_sim:.4f} ± {std_sim:.4f}" ) with gr.Blocks(title="Spotify Content-Based Recommender (Subset)") as demo: gr.Markdown("# Music Recommender - now that what i call music") gr.Markdown( f"It only uses **{n_used}** tracks from the full dataset " "to make sure all PC can handle it" ) with gr.Tab("Recommender"): song_input = gr.Dropdown( choices=track_labels, label="Choose a track (subset)", ) k_input = gr.Slider( minimum=1, maximum=10, value=5, step=1, label="Number of recommendations", ) recommend_button = gr.Button("Find recommends - find more music you grove") rec_output = gr.Dataframe( label="Recommended Tracks - that match your grove", interactive=False ) recommend_button.click( fn=recommend_tracks_ui, inputs=[song_input, k_input], outputs=rec_output, ) with gr.Tab("Evaluation"): gr.Markdown( "The recommender evaluates this subset using **mean cosine similarity** " "between query tracks and their top-k neighbors." ) k_eval = gr.Slider(1, 10, value=5, step=1, label="k (top-k neighbors)") n_eval = gr.Slider(10, 100, value=50, step=10, label="Number of random tracks to sample") eval_button = gr.Button("Run evaluation") eval_output = gr.Textbox(label="Result") eval_button.click( fn=evaluate_mean_similarity_ui, inputs=[k_eval, n_eval], outputs=eval_output, ) demo.launch()