Upload 3 files
Browse files- app.py +62 -0
- artist_recommender.py +228 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import sys
|
| 4 |
+
sys.path.extend(["."])
|
| 5 |
+
import artist_recommender
|
| 6 |
+
|
| 7 |
+
def export_csv(ad_output):
|
| 8 |
+
ad_output.to_csv("output.csv", index=False)
|
| 9 |
+
return gr.File(value="output.csv", visible=True)
|
| 10 |
+
|
| 11 |
+
def get_artist_recommendations(client_id, client_secret, client_redirect_uri, artist_list, create_playlist):
|
| 12 |
+
print("Initializing Spotify")
|
| 13 |
+
creds = {
|
| 14 |
+
"SPOTIPY_CLIENT_ID": client_id,
|
| 15 |
+
"SPOTIPY_CLIENT_SECRET": client_secret,
|
| 16 |
+
"SPOTIPY_REDIRECT_URI": client_redirect_uri
|
| 17 |
+
}
|
| 18 |
+
artist_recommender.initialize_spotify_client(creds)
|
| 19 |
+
sp = artist_recommender.sp
|
| 20 |
+
try:
|
| 21 |
+
_ = sp.current_user()
|
| 22 |
+
except:
|
| 23 |
+
print("Failed to initialize Spotify, are credentials correct?")
|
| 24 |
+
sys.exit()
|
| 25 |
+
tracks = artist_recommender.get_recently_played(selected_artists=artist_list)
|
| 26 |
+
print("Getting reference features")
|
| 27 |
+
reference_df = pd.DataFrame.from_records(tracks)
|
| 28 |
+
print("Getting matching artist features")
|
| 29 |
+
artist_features = artist_recommender.get_matching_artists(tracks)
|
| 30 |
+
artist_df = pd.DataFrame(artist_features)
|
| 31 |
+
reference_df['source'] = 'reference'
|
| 32 |
+
artist_df['source'] = 'artist'
|
| 33 |
+
df = pd.concat([reference_df, artist_df],ignore_index=True)
|
| 34 |
+
closest_artists = artist_recommender.get_closest_artists(df, artist_recommender.MUSIC_FEATURES+artist_recommender.SHEET_FEATURES)
|
| 35 |
+
closest_artists.to_csv("closest_artists.csv", index=False)
|
| 36 |
+
if create_playlist:
|
| 37 |
+
artist_recommender.generate_playlist(closest_artists, artist_list)
|
| 38 |
+
return closest_artists
|
| 39 |
+
|
| 40 |
+
with gr.Blocks() as demo:
|
| 41 |
+
gr.Markdown(
|
| 42 |
+
"""
|
| 43 |
+
# Spotify Artist Recommender
|
| 44 |
+
"""
|
| 45 |
+
)
|
| 46 |
+
client_id = gr.Textbox(label="SPOTIFY_CLIENT_ID", value="")
|
| 47 |
+
client_secret = gr.Textbox(label="SPOTIFY_CLIENT_SECRET", value="")
|
| 48 |
+
client_redirect_uri = gr.Textbox(label="SPOTIFY_REDIRECT_URI", value="")
|
| 49 |
+
artist_list = gr.Textbox(label="Arist list (Optional). Leave blank to use your recent activity. Otherwisr a CSV of artists", value="")
|
| 50 |
+
create_playlist = gr.Checkbox(label="Generate a playlist", value=True)
|
| 51 |
+
button = gr.Button(label="Get Artist Recommendations")
|
| 52 |
+
output1 = gr.DataFrame(headers=['artist', 'artist_id', 'distance', 'artist_url'], interactive=False, wrap=True)
|
| 53 |
+
button.click(fn=get_artist_recommendations,
|
| 54 |
+
inputs=[client_id, client_secret, client_redirect_uri, artist_list, create_playlist],
|
| 55 |
+
outputs=output1)
|
| 56 |
+
|
| 57 |
+
export_button = gr.Button("Export Recommendations")
|
| 58 |
+
csv = gr.File(interactive=False, visible=False)
|
| 59 |
+
export_button.click(fn=export_csv, inputs=[output1], outputs=[csv])
|
| 60 |
+
|
| 61 |
+
if __name__ == '__main__':
|
| 62 |
+
demo.launch()
|
artist_recommender.py
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import spotipy
|
| 3 |
+
from spotipy.oauth2 import SpotifyOAuth
|
| 4 |
+
import random
|
| 5 |
+
from sklearn.preprocessing import StandardScaler
|
| 6 |
+
import numpy as np
|
| 7 |
+
from scipy.spatial.distance import cosine
|
| 8 |
+
import json
|
| 9 |
+
from tqdm import tqdm
|
| 10 |
+
import argparse
|
| 11 |
+
import sys
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
pd.set_option('display.max_colwidth', None)
|
| 14 |
+
|
| 15 |
+
sp = None
|
| 16 |
+
|
| 17 |
+
MUSIC_FEATURES = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence"]
|
| 18 |
+
SHEET_FEATURES = ["key", "mode", "tempo"]
|
| 19 |
+
|
| 20 |
+
def initialize_spotify_client(credentials_file, isfile=True):
|
| 21 |
+
global sp
|
| 22 |
+
if isfile:
|
| 23 |
+
creds = json.load(open(credentials_file))
|
| 24 |
+
else:
|
| 25 |
+
creds = credentials_file
|
| 26 |
+
SPOTIPY_CLIENT_ID = creds['SPOTIPY_CLIENT_ID']
|
| 27 |
+
SPOTIPY_CLIENT_SECRET = creds['SPOTIPY_CLIENT_SECRET']
|
| 28 |
+
SPOTIPY_REDIRECT_URI = creds['SPOTIPY_REDIRECT_URI']
|
| 29 |
+
SCOPE = 'playlist-modify-public user-read-recently-played'
|
| 30 |
+
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(client_id=SPOTIPY_CLIENT_ID,
|
| 31 |
+
client_secret=SPOTIPY_CLIENT_SECRET,
|
| 32 |
+
redirect_uri=SPOTIPY_REDIRECT_URI,
|
| 33 |
+
scope=SCOPE))
|
| 34 |
+
|
| 35 |
+
def find_artists_with_matching_genres(target_genres):
|
| 36 |
+
matched_artists = []
|
| 37 |
+
# Try searching for a perfect match of genres then randomly remove one genre at a time until get at least 100
|
| 38 |
+
while len(target_genres) > 0 and len(matched_artists) < 10:
|
| 39 |
+
print(f"Trying {target_genres}")
|
| 40 |
+
query = " AND ".join([f"genre:\"{genre}\"" for genre in target_genres])
|
| 41 |
+
artist_results = sp.search(q=query, type='artist', limit=10)
|
| 42 |
+
matched_artists.extend([{'id':artist['id'], 'name':artist['name'], 'artist_url':artist['external_urls']['spotify']} for artist in artist_results['artists']['items'] if artist not in matched_artists])
|
| 43 |
+
random_item = random.choice(target_genres)
|
| 44 |
+
target_genres.remove(random_item)
|
| 45 |
+
return matched_artists
|
| 46 |
+
|
| 47 |
+
def get_top_track_features(artist_id):
|
| 48 |
+
try:
|
| 49 |
+
top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
|
| 50 |
+
if top_tracks:
|
| 51 |
+
top_track_id = top_tracks[0]['id']
|
| 52 |
+
top_track_name = top_tracks[0]['name']
|
| 53 |
+
features = sp.audio_features([top_track_id])[0]
|
| 54 |
+
features['track_id'] = top_track_id
|
| 55 |
+
features['track_name'] = top_track_name
|
| 56 |
+
if not features:
|
| 57 |
+
print("Audio features could not be retrieved.")
|
| 58 |
+
features = {}
|
| 59 |
+
else:
|
| 60 |
+
print("No top tracks found for this artist.")
|
| 61 |
+
features = {}
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error retrieving top track for artist: {e}")
|
| 64 |
+
features = {}
|
| 65 |
+
return features
|
| 66 |
+
|
| 67 |
+
def average_cosine_distance(A, B):
|
| 68 |
+
average_distances = []
|
| 69 |
+
for b in B:
|
| 70 |
+
distances = [cosine(b, a) for a in A]
|
| 71 |
+
average_distances.append(np.mean(distances))
|
| 72 |
+
return np.array(average_distances)
|
| 73 |
+
|
| 74 |
+
def get_recently_played(limit=50, selected_artists = ""):
|
| 75 |
+
if len(selected_artists) == 0:
|
| 76 |
+
print("Getting most recently played artists")
|
| 77 |
+
results = sp.current_user_recently_played(limit=limit)
|
| 78 |
+
else:
|
| 79 |
+
selected_artists = [x.strip() for x in selected_artists.split(",")]
|
| 80 |
+
print(f"Using supplied list of artists (first 10 artists only)")
|
| 81 |
+
results = {'items':[]}
|
| 82 |
+
for artist in selected_artists[:10]:
|
| 83 |
+
artist_objects = sp.search(q=f"artist: {artist}", type='artist')
|
| 84 |
+
try:
|
| 85 |
+
artist_objects = sorted([x for x in artist_objects['artists']['items'] if x['name'].lower() == artist.lower()],
|
| 86 |
+
key= lambda x: x['popularity'], reverse=True)
|
| 87 |
+
except KeyError:
|
| 88 |
+
artist_objects = []
|
| 89 |
+
if len(artist_objects) == 0:
|
| 90 |
+
print(f"{artist} not found")
|
| 91 |
+
continue
|
| 92 |
+
elif len(artist_objects) > 1:
|
| 93 |
+
print(f"Multiple artist_id found for {artist}, selecting the most popular artist_id in the list.")
|
| 94 |
+
artist_id = artist_objects[0]['id']
|
| 95 |
+
try:
|
| 96 |
+
artist_url = artist_objects[0]['external_urls']['spotify']
|
| 97 |
+
except KeyError:
|
| 98 |
+
artist_url = ""
|
| 99 |
+
results['items'].append({
|
| 100 |
+
'track': {
|
| 101 |
+
'artists': [{'id': artist_id,
|
| 102 |
+
'name': artist,
|
| 103 |
+
'external_urls': {'spotify': artist_url}}]}})
|
| 104 |
+
tracks = []
|
| 105 |
+
for idx, item in enumerate(results['items']):
|
| 106 |
+
try:
|
| 107 |
+
track = item['track']
|
| 108 |
+
artist_id = track['artists'][0]['id']
|
| 109 |
+
except KeyError as e:
|
| 110 |
+
print(f"Failed index {idx}:")
|
| 111 |
+
print(e)
|
| 112 |
+
continue
|
| 113 |
+
if artist_id in [x['artist_id'] for x in tracks]:
|
| 114 |
+
continue
|
| 115 |
+
elif len(tracks) >= 10:
|
| 116 |
+
break
|
| 117 |
+
features = get_top_track_features(artist_id)
|
| 118 |
+
if len(features.keys()) == 0:
|
| 119 |
+
print(f"No features found for artist: {artist_id}")
|
| 120 |
+
continue
|
| 121 |
+
try:
|
| 122 |
+
features['name'] = track['artists'][0]['name']
|
| 123 |
+
except KeyError as e:
|
| 124 |
+
print(f"Failed to get artists name for {artist_id}:")
|
| 125 |
+
print(e)
|
| 126 |
+
features['name'] = np.nan
|
| 127 |
+
features['artist_id'] = artist_id
|
| 128 |
+
try:
|
| 129 |
+
features['artist_url'] = track['artists'][0]['external_urls']['spotify']
|
| 130 |
+
except KeyError as e:
|
| 131 |
+
print(f"Failed to get URL for artist {artist_id}:")
|
| 132 |
+
print(e)
|
| 133 |
+
features['artist_url'] = np.nan
|
| 134 |
+
tracks.append(features)
|
| 135 |
+
print(f"{idx+1}: {features['name']} - {features['track_name']}")
|
| 136 |
+
return tracks
|
| 137 |
+
|
| 138 |
+
def get_matching_artists(tracks):
|
| 139 |
+
artist_features = []
|
| 140 |
+
for idx, item in tqdm(enumerate(tracks)):
|
| 141 |
+
artist_id = item['artist_id']
|
| 142 |
+
artist = sp.artist(artist_id)
|
| 143 |
+
genres = artist.get('genres',[])
|
| 144 |
+
if len(genres)<1:
|
| 145 |
+
continue
|
| 146 |
+
matched_artists = find_artists_with_matching_genres([x for x in genres])
|
| 147 |
+
if len(matched_artists) <= 1: #If it only managed to match itself
|
| 148 |
+
continue
|
| 149 |
+
for artist in matched_artists:
|
| 150 |
+
artist_id = artist.get('id',"")
|
| 151 |
+
if artist_id == "" or artist_id in [x['artist_id'] for x in tracks+artist_features]:
|
| 152 |
+
continue
|
| 153 |
+
#print(artist.get('name', 'UNKNOWN ARTIST'))
|
| 154 |
+
features = get_top_track_features(artist_id)
|
| 155 |
+
if len(features.keys())==0:
|
| 156 |
+
continue
|
| 157 |
+
features['artist_id'] = artist_id
|
| 158 |
+
features['name'] = artist['name']
|
| 159 |
+
features['artist_url'] = artist['artist_url']
|
| 160 |
+
artist_features.append(features)
|
| 161 |
+
return artist_features
|
| 162 |
+
|
| 163 |
+
def get_closest_artists(df, features):
|
| 164 |
+
all_feats = StandardScaler().fit_transform(df[features])
|
| 165 |
+
all_feats_ref = all_feats[df[df.source=="reference"].index]
|
| 166 |
+
all_feats_new = all_feats[df[df.source=="artist"].index]
|
| 167 |
+
result_distances = average_cosine_distance(all_feats_ref, all_feats_new)
|
| 168 |
+
results = pd.DataFrame({"artist": df[df.source == "artist"]['name'].values, "artist_id": df[df.source == "artist"]['artist_id'].values,
|
| 169 |
+
"distance": result_distances,
|
| 170 |
+
'artist_url': df[df.source == "artist"]['artist_url']})
|
| 171 |
+
return results.sort_values("distance").head(10)
|
| 172 |
+
|
| 173 |
+
def generate_playlist(closest_artists, artists = []):
|
| 174 |
+
playlist_name = f"Recommended_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
| 175 |
+
playlist_desc = f"Top 10 recommended artists based on supplied artists: {', '.join(artists)}" if len(artists) > 0 else "Top 10 recommended artists from recently played."
|
| 176 |
+
user_id = sp.current_user()['id']
|
| 177 |
+
new_playlist = sp.user_playlist_create(user_id, playlist_name, description=playlist_desc)
|
| 178 |
+
playlist_id = new_playlist['id']
|
| 179 |
+
for i,r in closest_artists.iterrows():
|
| 180 |
+
print(r)
|
| 181 |
+
artist = r.artist
|
| 182 |
+
artist_id = r.artist_id
|
| 183 |
+
top_tracks = sp.artist_top_tracks(artist_id, country='US')['tracks']
|
| 184 |
+
if len(top_tracks) == 0:
|
| 185 |
+
continue
|
| 186 |
+
else:
|
| 187 |
+
top_track_uri = top_tracks[0]['uri']
|
| 188 |
+
sp.playlist_add_items(playlist_id, [top_track_uri])
|
| 189 |
+
|
| 190 |
+
if "name" in new_playlist and "external_urls" in new_playlist:
|
| 191 |
+
print("Created playlist:", new_playlist['name'], "with URL:", new_playlist['external_urls']['spotify'])
|
| 192 |
+
return True
|
| 193 |
+
else:
|
| 194 |
+
print("failed to create new playlist")
|
| 195 |
+
return False
|
| 196 |
+
|
| 197 |
+
def main():
|
| 198 |
+
parser = argparse.ArgumentParser(description="Spotify artist recommender. Requires a JSON with spotify credentials "
|
| 199 |
+
"(see credentials.json.example). Can also take a comma separated list "
|
| 200 |
+
"of artists instead of looking up last played.")
|
| 201 |
+
parser.add_argument('--creds', type=str, help='Path to credentials json file', required=True)
|
| 202 |
+
parser.add_argument('--artists', type=str, help='Comma separated list of artists', default="")
|
| 203 |
+
parser.add_argument('--playlist', action='store_true', help='Create a Spotify playlist if set ("Recommended_timstamp")')
|
| 204 |
+
args = parser.parse_args()
|
| 205 |
+
|
| 206 |
+
print("Initializing Spotify")
|
| 207 |
+
initialize_spotify_client(args.creds)
|
| 208 |
+
try:
|
| 209 |
+
_ = sp.current_user()
|
| 210 |
+
except:
|
| 211 |
+
print("Failed to initialize Spotify, are credentials correct?")
|
| 212 |
+
sys.exit()
|
| 213 |
+
tracks = get_recently_played(selected_artists=args.artists)
|
| 214 |
+
print("Getting reference features")
|
| 215 |
+
reference_df = pd.DataFrame.from_records(tracks)
|
| 216 |
+
print("Getting matching artist features")
|
| 217 |
+
artist_features = get_matching_artists(tracks)
|
| 218 |
+
artist_df = pd.DataFrame(artist_features)
|
| 219 |
+
reference_df['source'] = 'reference'
|
| 220 |
+
artist_df['source'] = 'artist'
|
| 221 |
+
df = pd.concat([reference_df, artist_df],ignore_index=True)
|
| 222 |
+
closest_artists = get_closest_artists(df, MUSIC_FEATURES+SHEET_FEATURES)
|
| 223 |
+
closest_artists.to_csv("closest_artists.csv", index=False)
|
| 224 |
+
if args.playlist:
|
| 225 |
+
generate_playlist(closest_artists, args.artists)
|
| 226 |
+
|
| 227 |
+
if __name__ == "__main__":
|
| 228 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
spotipy==2.23.0
|
| 3 |
+
scikit-learn
|
| 4 |
+
numpy
|
| 5 |
+
scipy
|
| 6 |
+
tqdm
|