Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,18 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
import pandas as pd
|
| 4 |
import time
|
|
@@ -127,6 +141,16 @@ def get_album_tracks(token, album_url):
|
|
| 127 |
print(f"Extracted album ID: {album_id}")
|
| 128 |
|
| 129 |
headers = {'Authorization': f'Bearer {token}'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
tracks_url = f'https://api.spotify.com/v1/albums/{album_id}/tracks'
|
| 131 |
|
| 132 |
all_tracks = []
|
|
@@ -141,23 +165,31 @@ def get_album_tracks(token, album_url):
|
|
| 141 |
data = response.json()
|
| 142 |
items = data.get('items', [])
|
| 143 |
|
| 144 |
-
# Get album details for additional info
|
| 145 |
-
album_info = None
|
| 146 |
-
if len(all_tracks) == 0: # Only need to get album info once
|
| 147 |
-
album_response = make_request_with_retry(f'https://api.spotify.com/v1/albums/{album_id}', headers)
|
| 148 |
-
if album_response:
|
| 149 |
-
album_info = album_response.json()
|
| 150 |
-
|
| 151 |
for item in items:
|
| 152 |
if item:
|
| 153 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
if album_info:
|
| 155 |
item['album'] = {
|
| 156 |
'name': album_info.get('name', 'Unknown'),
|
| 157 |
'release_date': album_info.get('release_date', 'Not available'),
|
| 158 |
'id': album_id
|
| 159 |
}
|
| 160 |
-
|
| 161 |
all_tracks.append(item)
|
| 162 |
|
| 163 |
next_url = data.get('next')
|
|
@@ -212,6 +244,11 @@ def extract_track_details(tracks, token):
|
|
| 212 |
if artist_id in artists_cache:
|
| 213 |
genres = artists_cache[artist_id].get('genres', [])
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
# Calculate duration in minutes:seconds format
|
| 216 |
duration_ms = track.get('duration_ms', 0)
|
| 217 |
if duration_ms:
|
|
@@ -225,7 +262,7 @@ def extract_track_details(tracks, token):
|
|
| 225 |
'artist': artist_name,
|
| 226 |
'title': track.get('name', 'Unknown'),
|
| 227 |
'album': track.get('album', {}).get('name', 'Unknown'),
|
| 228 |
-
'isrc':
|
| 229 |
'track_popularity': track.get('popularity', 'Not available'),
|
| 230 |
'genres': ', '.join(genres) if genres else 'Not available',
|
| 231 |
'artist_popularity': artists_cache.get(artist_id, {}).get('popularity', 'Not available') if artist_id else 'Not available',
|
|
@@ -274,6 +311,8 @@ def interface(project_name, spotify_urls, include_all_info=True):
|
|
| 274 |
error_message = "Failed to authenticate with Spotify API. Please try again later."
|
| 275 |
return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
|
| 276 |
|
|
|
|
|
|
|
| 277 |
all_tracks = []
|
| 278 |
|
| 279 |
# Process each URL
|
|
@@ -303,9 +342,11 @@ def interface(project_name, spotify_urls, include_all_info=True):
|
|
| 303 |
return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
|
| 304 |
|
| 305 |
# Extract track details including artist information
|
|
|
|
| 306 |
tracks_info = extract_track_details(all_tracks, token_spotify)
|
| 307 |
|
| 308 |
# Remove duplicate tracks (based on ISRC or title+artist if ISRC not available)
|
|
|
|
| 309 |
df = pd.DataFrame(tracks_info)
|
| 310 |
|
| 311 |
# Create a key for deduplication
|
|
@@ -322,7 +363,7 @@ def interface(project_name, spotify_urls, include_all_info=True):
|
|
| 322 |
|
| 323 |
# Filter columns if not include_all_info
|
| 324 |
if not include_all_info:
|
| 325 |
-
columns_to_keep = ['artist', 'title', 'album', 'genres', 'release_date', 'track_popularity', 'explicit', 'spotify_url']
|
| 326 |
df = df[columns_to_keep]
|
| 327 |
|
| 328 |
# Save DataFrame to an Excel file
|
|
|
|
| 1 |
+
def get_track_isrc(token, track):
|
| 2 |
+
"""Get ISRC code for a track if not already present."""
|
| 3 |
+
if track.get('external_ids', {}).get('isrc'):
|
| 4 |
+
return track['external_ids']['isrc']
|
| 5 |
+
|
| 6 |
+
# If track doesn't have ISRC, try to get it from the API
|
| 7 |
+
if track.get('id'):
|
| 8 |
+
headers = {'Authorization': f'Bearer {token}'}
|
| 9 |
+
url = f'https://api.spotify.com/v1/tracks/{track["id"]}'
|
| 10 |
+
|
| 11 |
+
response = make_request_with_retry(url, headers)
|
| 12 |
+
if response and response.json().get('external_ids', {}).get('isrc'):
|
| 13 |
+
return response.json()['external_ids']['isrc']
|
| 14 |
+
|
| 15 |
+
return 'Not available'import gradio as gr
|
| 16 |
import requests
|
| 17 |
import pandas as pd
|
| 18 |
import time
|
|
|
|
| 141 |
print(f"Extracted album ID: {album_id}")
|
| 142 |
|
| 143 |
headers = {'Authorization': f'Bearer {token}'}
|
| 144 |
+
|
| 145 |
+
# First get the album to get its information
|
| 146 |
+
album_info = None
|
| 147 |
+
album_response = make_request_with_retry(f'https://api.spotify.com/v1/albums/{album_id}', headers)
|
| 148 |
+
if album_response:
|
| 149 |
+
album_info = album_response.json()
|
| 150 |
+
print(f"Successfully retrieved album info: {album_info.get('name')}")
|
| 151 |
+
else:
|
| 152 |
+
print(f"Failed to retrieve album info for ID: {album_id}")
|
| 153 |
+
|
| 154 |
tracks_url = f'https://api.spotify.com/v1/albums/{album_id}/tracks'
|
| 155 |
|
| 156 |
all_tracks = []
|
|
|
|
| 165 |
data = response.json()
|
| 166 |
items = data.get('items', [])
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
for item in items:
|
| 169 |
if item:
|
| 170 |
+
# We need to get full track information for ISRCs
|
| 171 |
+
track_id = item.get('id')
|
| 172 |
+
if track_id:
|
| 173 |
+
track_response = make_request_with_retry(f'https://api.spotify.com/v1/tracks/{track_id}', headers)
|
| 174 |
+
if track_response:
|
| 175 |
+
track = track_response.json()
|
| 176 |
+
# Add album info
|
| 177 |
+
if album_info:
|
| 178 |
+
track['album'] = {
|
| 179 |
+
'name': album_info.get('name', 'Unknown'),
|
| 180 |
+
'release_date': album_info.get('release_date', 'Not available'),
|
| 181 |
+
'id': album_id
|
| 182 |
+
}
|
| 183 |
+
all_tracks.append(track)
|
| 184 |
+
continue
|
| 185 |
+
|
| 186 |
+
# Fallback if we couldn't get the full track
|
| 187 |
if album_info:
|
| 188 |
item['album'] = {
|
| 189 |
'name': album_info.get('name', 'Unknown'),
|
| 190 |
'release_date': album_info.get('release_date', 'Not available'),
|
| 191 |
'id': album_id
|
| 192 |
}
|
|
|
|
| 193 |
all_tracks.append(item)
|
| 194 |
|
| 195 |
next_url = data.get('next')
|
|
|
|
| 244 |
if artist_id in artists_cache:
|
| 245 |
genres = artists_cache[artist_id].get('genres', [])
|
| 246 |
|
| 247 |
+
# Get ISRC if not already present
|
| 248 |
+
isrc = track.get('external_ids', {}).get('isrc', 'Not available')
|
| 249 |
+
if isrc == 'Not available':
|
| 250 |
+
isrc = get_track_isrc(token, track)
|
| 251 |
+
|
| 252 |
# Calculate duration in minutes:seconds format
|
| 253 |
duration_ms = track.get('duration_ms', 0)
|
| 254 |
if duration_ms:
|
|
|
|
| 262 |
'artist': artist_name,
|
| 263 |
'title': track.get('name', 'Unknown'),
|
| 264 |
'album': track.get('album', {}).get('name', 'Unknown'),
|
| 265 |
+
'isrc': isrc,
|
| 266 |
'track_popularity': track.get('popularity', 'Not available'),
|
| 267 |
'genres': ', '.join(genres) if genres else 'Not available',
|
| 268 |
'artist_popularity': artists_cache.get(artist_id, {}).get('popularity', 'Not available') if artist_id else 'Not available',
|
|
|
|
| 311 |
error_message = "Failed to authenticate with Spotify API. Please try again later."
|
| 312 |
return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
|
| 313 |
|
| 314 |
+
print(f"Successfully authenticated with Spotify API")
|
| 315 |
+
|
| 316 |
all_tracks = []
|
| 317 |
|
| 318 |
# Process each URL
|
|
|
|
| 342 |
return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
|
| 343 |
|
| 344 |
# Extract track details including artist information
|
| 345 |
+
print("Extracting detailed track information including ISRCs...")
|
| 346 |
tracks_info = extract_track_details(all_tracks, token_spotify)
|
| 347 |
|
| 348 |
# Remove duplicate tracks (based on ISRC or title+artist if ISRC not available)
|
| 349 |
+
print("Creating DataFrame and removing duplicates...")
|
| 350 |
df = pd.DataFrame(tracks_info)
|
| 351 |
|
| 352 |
# Create a key for deduplication
|
|
|
|
| 363 |
|
| 364 |
# Filter columns if not include_all_info
|
| 365 |
if not include_all_info:
|
| 366 |
+
columns_to_keep = ['artist', 'title', 'isrc', 'album', 'genres', 'release_date', 'track_popularity', 'explicit', 'spotify_url']
|
| 367 |
df = df[columns_to_keep]
|
| 368 |
|
| 369 |
# Save DataFrame to an Excel file
|