Iueleflaekkefar commited on
Commit
ab4aa9a
·
verified ·
1 Parent(s): a46b5e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -34
app.py CHANGED
@@ -1,57 +1,89 @@
1
- import gradio as gr
2
- import pandas as pd
3
  import numpy as np
4
- import joblib
5
- from sklearn.metrics.pairwise import cosine_similarity
 
6
 
7
- BUNDLE_PATH = "spotify_recommender.joblib" # make sure this is in the Space
8
- bundle = joblib.load(BUNDLE_PATH)
9
 
10
- nn_model = bundle["nn_model"]
11
- features = bundle["features"] # feature matrix (N x D)
12
- data = bundle["data"] # metadata DataFrame
13
- track_labels = bundle["track_labels"] # list of "track – artist" labels
 
 
14
 
15
  label_to_index = {label: i for i, label in enumerate(track_labels)}
16
 
17
- def recommend_tracks_ui(query_label, k):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  if query_label not in label_to_index:
19
  return pd.DataFrame(
20
  {"error": ["Track not found. Please select from the dropdown."]}
21
  )
22
 
23
  idx = label_to_index[query_label]
24
- # Get k+1 neighbors and drop the first one (it is the track itself)
 
25
  distances, indices = nn_model.kneighbors(
26
- features[idx:idx+1],
27
- n_neighbors=min(len(features), int(k) + 1)
28
  )
29
 
30
- indices = indices[0]
31
  distances = distances[0]
 
32
 
33
- # remove self (distance 0)
34
  mask = indices != idx
35
  indices = indices[mask][:int(k)]
36
  distances = distances[mask][:int(k)]
37
 
38
- # cosine similarity = 1 - cosine distance
39
  similarities = 1.0 - distances
40
 
41
- results = data.loc[indices, [
42
- "track_name",
43
- "artist_name",
44
- "album_name",
45
- "track_popularity",
46
- ]].copy()
47
- results["similarity"] = similarities
48
-
49
- return results.reset_index(drop=True)
50
-
51
- def evaluate_mean_similarity_ui(k, n_samples):
 
 
 
 
 
 
 
 
 
 
 
 
52
  k = int(k)
53
  n_samples = int(n_samples)
54
  n = features.shape[0]
 
 
 
55
  n_samples = min(n_samples, n)
56
 
57
  rng = np.random.default_rng(42)
@@ -59,30 +91,40 @@ def evaluate_mean_similarity_ui(k, n_samples):
59
 
60
  all_means = []
61
  for idx in sample_indices:
 
62
  distances, indices = nn_model.kneighbors(
63
- features[idx:idx+1],
64
- n_neighbors=min(len(features), k + 1)
65
  )
 
66
  distances = distances[0]
67
  indices = indices[0]
68
 
69
- # drop self
70
  mask = indices != idx
71
  distances = distances[mask][:k]
72
 
 
 
 
73
  similarities = 1.0 - distances
74
  all_means.append(similarities.mean())
75
 
 
 
 
76
  all_means = np.array(all_means)
77
  mean_sim = float(all_means.mean())
78
  std_sim = float(all_means.std())
79
 
80
- return f"Mean top-{k} cosine similarity over {n_samples} random tracks: {mean_sim:.4f} ± {std_sim:.4f}"
 
 
 
81
 
82
  with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
83
  gr.Markdown("# 🎧 Spotify Content-Based Recommender")
84
  gr.Markdown(
85
- "Select a song and get similar tracks based on a trained Nearest Neighbors model."
86
  )
87
 
88
  with gr.Tab("Recommender"):
@@ -111,7 +153,8 @@ with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
111
 
112
  with gr.Tab("Evaluation"):
113
  gr.Markdown(
114
- "We measure quality using **mean cosine similarity** between query tracks and their top-k recommendations."
 
115
  )
116
  k_eval = gr.Slider(1, 20, value=10, step=1, label="k (top-k neighbors)")
117
  n_eval = gr.Slider(50, 500, value=200, step=50, label="Number of random tracks to sample")
 
1
+ import pickle
 
2
  import numpy as np
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from sklearn.neighbors import NearestNeighbors
6
 
7
+ BUNDLE_PATH = "spotify_recommender.pkl"
 
8
 
9
+ with open(BUNDLE_PATH, "rb") as f:
10
+ bundle = pickle.load(f)
11
+
12
+ nn_model: NearestNeighbors = bundle["nn_model"]
13
+ features: np.ndarray = bundle["features"]
14
+ track_labels = bundle["track_labels"]
15
 
16
  label_to_index = {label: i for i, label in enumerate(track_labels)}
17
 
18
+ def _split_label(label: str):
19
+ """
20
+ label format: 'track_name – artist_name'
21
+ Uses an en dash (U+2013). Falls back gracefully if not present.
22
+ """
23
+ if " – " in label:
24
+ track_name, artist_name = label.split(" – ", 1)
25
+ else:
26
+ track_name, artist_name = label, ""
27
+ return track_name, artist_name
28
+
29
+
30
+ def recommend_tracks_ui(query_label: str, k: int):
31
+ """
32
+ Gradio-facing function that:
33
+ - finds k nearest neighbors for the selected track
34
+ - returns a DataFrame with track_name, artist_name, similarity
35
+ """
36
  if query_label not in label_to_index:
37
  return pd.DataFrame(
38
  {"error": ["Track not found. Please select from the dropdown."]}
39
  )
40
 
41
  idx = label_to_index[query_label]
42
+
43
+ n_neighbors = min(len(features), int(k) + 1)
44
  distances, indices = nn_model.kneighbors(
45
+ features[idx:idx + 1],
46
+ n_neighbors=n_neighbors
47
  )
48
 
 
49
  distances = distances[0]
50
+ indices = indices[0]
51
 
 
52
  mask = indices != idx
53
  indices = indices[mask][:int(k)]
54
  distances = distances[mask][:int(k)]
55
 
 
56
  similarities = 1.0 - distances
57
 
58
+ rows = []
59
+ for i, sim in zip(indices, similarities):
60
+ track_name, artist_name = _split_label(track_labels[i])
61
+ rows.append({
62
+ "track_name": track_name,
63
+ "artist_name": artist_name,
64
+ "similarity": float(sim),
65
+ })
66
+
67
+ if not rows:
68
+ return pd.DataFrame({"info": ["No neighbors found. Try a different k."]})
69
+
70
+ return pd.DataFrame(rows)
71
+
72
+
73
+ def evaluate_mean_similarity_ui(k: int, n_samples: int):
74
+ """
75
+ Evaluation function:
76
+ - randomly sample n_samples tracks
77
+ - for each, get top-k neighbors from the model
78
+ - compute mean cosine similarity of those neighbors
79
+ - return mean ± std as a string
80
+ """
81
  k = int(k)
82
  n_samples = int(n_samples)
83
  n = features.shape[0]
84
+ if n == 0:
85
+ return "No tracks in feature matrix."
86
+
87
  n_samples = min(n_samples, n)
88
 
89
  rng = np.random.default_rng(42)
 
91
 
92
  all_means = []
93
  for idx in sample_indices:
94
+ n_neighbors = min(n, k + 1)
95
  distances, indices = nn_model.kneighbors(
96
+ features[idx:idx + 1],
97
+ n_neighbors=n_neighbors
98
  )
99
+
100
  distances = distances[0]
101
  indices = indices[0]
102
 
 
103
  mask = indices != idx
104
  distances = distances[mask][:k]
105
 
106
+ if len(distances) == 0:
107
+ continue
108
+
109
  similarities = 1.0 - distances
110
  all_means.append(similarities.mean())
111
 
112
+ if not all_means:
113
+ return "Could not compute evaluation (no valid neighbors)."
114
+
115
  all_means = np.array(all_means)
116
  mean_sim = float(all_means.mean())
117
  std_sim = float(all_means.std())
118
 
119
+ return (
120
+ f"Mean top-{k} cosine similarity over {len(all_means)} random tracks: "
121
+ f"{mean_sim:.4f} ± {std_sim:.4f}"
122
+ )
123
 
124
  with gr.Blocks(title="Spotify Content-Based Recommender") as demo:
125
  gr.Markdown("# 🎧 Spotify Content-Based Recommender")
126
  gr.Markdown(
127
+ "Select a song and get similar tracks using a trained Nearest Neighbors model."
128
  )
129
 
130
  with gr.Tab("Recommender"):
 
153
 
154
  with gr.Tab("Evaluation"):
155
  gr.Markdown(
156
+ "We measure quality using **mean cosine similarity** between query tracks "
157
+ "and their top-k recommendations."
158
  )
159
  k_eval = gr.Slider(1, 20, value=10, step=1, label="k (top-k neighbors)")
160
  n_eval = gr.Slider(50, 500, value=200, step=50, label="Number of random tracks to sample")