Mthrfkr commited on
Commit
4d76c8d
verified
1 Parent(s): c33f952

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -374
app.py CHANGED
@@ -3,410 +3,126 @@ import requests
3
  import pandas as pd
4
  import time
5
  import shutil
6
- import numpy as np
7
  import os
8
  from tempfile import NamedTemporaryFile
9
- from openpyxl import Workbook
10
 
11
  # Spotify API credentials from environment variables
12
- client_ids = os.getenv("SPOTIFY_CLIENT_IDS")
13
- client_secrets = os.getenv("SPOTIFY_CLIENT_SECRETS")
14
 
15
  if not client_ids or not client_secrets:
16
  raise ValueError("SPOTIFY_CLIENT_IDS or SPOTIFY_CLIENT_SECRETS environment variables not set.")
17
 
18
- client_ids = client_ids.split(',')
19
- client_secrets = client_secrets.split(',')
20
  current_api_index = 0
21
-
22
- # Request counters
23
- total_requests = 0
24
-
25
- # Spotify Functions
26
- def get_token(client_id, client_secret):
27
- url = 'https://accounts.spotify.com/api/token'
28
- headers = {'Content-Type': 'application/x-www-form-urlencoded'}
29
- payload = {'grant_type': 'client_credentials'}
30
- response = requests.post(url, headers=headers, data=payload, auth=(client_id, client_secret))
31
- global total_requests
32
- total_requests += 1
33
- if response.status_code == 200:
34
- return response.json().get('access_token')
35
- else:
36
- print(f"Error getting token: {response.status_code} - {response.text}")
37
- return None
38
-
39
- def handle_rate_limit(response, attempt):
40
- if response.status_code == 429:
41
- retry_after = int(response.headers.get('Retry-After', 1))
42
- wait_time = retry_after + 10 * (2 ** attempt) # Exponential backoff
43
- time.sleep(wait_time)
44
- return True
45
- return False
46
-
47
- def make_request_with_retry(url, headers, params=None, max_retries=5):
48
- global total_requests
49
- for attempt in range(max_retries):
50
- response = requests.get(url, headers=headers, params=params)
51
- total_requests += 1
52
- if handle_rate_limit(response, attempt):
53
- continue
54
  if response.status_code == 200:
55
- return response
56
  else:
57
- print(f"Request failed: {response.status_code} - {response.text}")
58
- time.sleep(1) # Small delay before retrying
59
- return None
60
-
61
- def extract_id_from_url(url, type_keyword):
62
- """Extract Spotify ID from URL for track, playlist or album."""
63
- if type_keyword in url:
64
- parts = url.split("/")
65
- for i, part in enumerate(parts):
66
- if type_keyword in part and i + 1 < len(parts):
67
- potential_id = parts[i + 1].split("?")[0]
68
- if potential_id:
69
- return potential_id
70
-
71
- # Special handling for spotify.com/intl-xx/album type URLs
72
- if "intl-" in url and "/album/" in url:
73
- parts = url.split("/album/")
74
- if len(parts) > 1:
75
- return parts[1].split("?")[0].split("/")[0]
76
-
77
- # If above fails, try to find ID in the last part of the URL
78
- parts = url.split("/")
79
- last_part = parts[-1]
80
- if "?" in last_part:
81
- return last_part.split("?")[0]
82
- return last_part
83
-
84
- def get_playlist_tracks(token, playlist_url):
85
- """Get all tracks from a playlist URL."""
86
- playlist_id = extract_id_from_url(playlist_url, "playlist")
87
- print(f"Extracted playlist ID: {playlist_id}")
88
-
89
- headers = {'Authorization': f'Bearer {token}'}
90
- tracks_url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
91
-
92
- all_tracks = []
93
- next_url = tracks_url
94
-
95
- while next_url:
96
- print(f"Fetching tracks from: {next_url}")
97
- response = make_request_with_retry(next_url, headers)
98
- if not response:
99
- break
100
-
101
- data = response.json()
102
- items = data.get('items', [])
103
- for item in items:
104
- if item and item.get('track'):
105
- track = item['track']
106
- all_tracks.append(track)
107
-
108
- next_url = data.get('next')
109
-
110
- print(f"Found {len(all_tracks)} tracks in playlist")
111
- return all_tracks
112
-
113
- def get_track_info(token, track_url):
114
- """Get information about a single track."""
115
- track_id = extract_id_from_url(track_url, "track")
116
- print(f"Extracted track ID: {track_id}")
117
-
118
- headers = {'Authorization': f'Bearer {token}'}
119
- url = f'https://api.spotify.com/v1/tracks/{track_id}'
120
-
121
- response = make_request_with_retry(url, headers)
122
- if response:
123
- return [response.json()]
124
- return []
125
-
126
- def get_album_tracks(token, album_url):
127
- """Get all tracks from an album URL."""
128
- album_id = extract_id_from_url(album_url, "album")
129
- print(f"Extracted album ID: {album_id}")
130
-
131
- headers = {'Authorization': f'Bearer {token}'}
132
-
133
- # First get the album to get its information
134
- album_info = None
135
- album_response = make_request_with_retry(f'https://api.spotify.com/v1/albums/{album_id}', headers)
136
- if album_response:
137
- album_info = album_response.json()
138
- print(f"Successfully retrieved album info: {album_info.get('name')}")
139
- else:
140
- print(f"Failed to retrieve album info for ID: {album_id}")
141
-
142
- tracks_url = f'https://api.spotify.com/v1/albums/{album_id}/tracks'
143
-
144
- all_tracks = []
145
- next_url = tracks_url
146
-
147
- while next_url:
148
- print(f"Fetching tracks from: {next_url}")
149
- response = make_request_with_retry(next_url, headers)
150
- if not response:
151
- break
152
-
153
- data = response.json()
154
- items = data.get('items', [])
155
-
156
- for item in items:
157
- if item:
158
- # We need to get full track information for ISRCs
159
- track_id = item.get('id')
160
- if track_id:
161
- track_response = make_request_with_retry(f'https://api.spotify.com/v1/tracks/{track_id}', headers)
162
- if track_response:
163
- track = track_response.json()
164
- # Add album info
165
- if album_info:
166
- track['album'] = {
167
- 'name': album_info.get('name', 'Unknown'),
168
- 'release_date': album_info.get('release_date', 'Not available'),
169
- 'id': album_id
170
- }
171
- all_tracks.append(track)
172
- continue
173
-
174
- # Fallback if we couldn't get the full track
175
- if album_info:
176
- item['album'] = {
177
- 'name': album_info.get('name', 'Unknown'),
178
- 'release_date': album_info.get('release_date', 'Not available'),
179
- 'id': album_id
180
- }
181
- all_tracks.append(item)
182
-
183
- next_url = data.get('next')
184
-
185
- print(f"Found {len(all_tracks)} tracks in album")
186
-
187
- # Mark the source
188
- for track in all_tracks:
189
- track['playlist_source'] = album_url
190
-
191
- return all_tracks
192
-
193
- def get_artist_details(token, artist_id):
194
- """Get artist information including genres."""
195
- headers = {'Authorization': f'Bearer {token}'}
196
- url = f'https://api.spotify.com/v1/artists/{artist_id}'
197
 
198
- response = make_request_with_retry(url, headers)
199
- if response:
200
- return response.json()
201
- return None
202
 
203
- def get_track_isrc(token, track):
204
- """Get ISRC code for a track if not already present."""
205
- if track.get('external_ids', {}).get('isrc'):
206
- return track['external_ids']['isrc']
207
-
208
- # If track doesn't have ISRC, try to get it from the API
209
- if track.get('id'):
210
- headers = {'Authorization': f'Bearer {token}'}
211
- url = f'https://api.spotify.com/v1/tracks/{track["id"]}'
212
 
213
- response = make_request_with_retry(url, headers)
214
- if response and response.json().get('external_ids', {}).get('isrc'):
215
- return response.json()['external_ids']['isrc']
216
-
217
- return 'Not available'
218
-
219
- def extract_track_details(tracks, token):
220
- """Extract relevant information from track objects including artist details."""
221
- tracks_info = []
222
- artists_cache = {} # Cache artist details to reduce API calls
223
-
224
- for track in tracks:
225
- if not track:
226
  continue
227
-
228
- # Get artist details if we have an artist ID
229
- artist_id = None
230
- artist_name = 'Unknown'
231
- genres = []
232
-
233
- if track.get('artists') and len(track['artists']) > 0:
234
- artist_id = track['artists'][0].get('id')
235
- artist_name = track['artists'][0].get('name', 'Unknown')
236
 
237
- # Only fetch artist details if we have an ID and haven't cached it already
238
- if artist_id and artist_id not in artists_cache:
239
- artist_data = get_artist_details(token, artist_id)
240
- if artist_data:
241
- artists_cache[artist_id] = {
242
- 'genres': artist_data.get('genres', []),
243
- 'popularity': artist_data.get('popularity', 'Not available'),
244
- 'followers': artist_data.get('followers', {}).get('total', 'Not available')
245
- }
246
-
247
- # Get genres from cache
248
- if artist_id in artists_cache:
249
- genres = artists_cache[artist_id].get('genres', [])
250
-
251
- # Get ISRC if not already present
252
- isrc = track.get('external_ids', {}).get('isrc', 'Not available')
253
- if isrc == 'Not available':
254
- isrc = get_track_isrc(token, track)
255
-
256
- # Calculate duration in minutes:seconds format
257
- duration_ms = track.get('duration_ms', 0)
258
- if duration_ms:
259
- minutes = duration_ms // 60000
260
- seconds = (duration_ms % 60000) // 1000
261
- duration_formatted = f"{minutes}:{seconds:02d}"
262
- else:
263
- duration_formatted = 'Not available'
264
 
265
- tracks_info.append({
266
- 'artist': artist_name,
267
- 'title': track.get('name', 'Unknown'),
268
- 'album': track.get('album', {}).get('name', 'Unknown'),
269
- 'isrc': isrc,
270
- 'track_popularity': track.get('popularity', 'Not available'),
271
- 'genres': ', '.join(genres) if genres else 'Not available',
272
- 'artist_popularity': artists_cache.get(artist_id, {}).get('popularity', 'Not available') if artist_id else 'Not available',
273
- 'artist_followers': artists_cache.get(artist_id, {}).get('followers', 'Not available') if artist_id else 'Not available',
274
- 'release_date': track.get('album', {}).get('release_date', 'Not available'),
275
- 'duration': duration_formatted,
276
- 'duration_ms': duration_ms,
277
- 'explicit': 'Yes' if track.get('explicit', False) else 'No',
278
- 'spotify_url': track.get('external_urls', {}).get('spotify', 'Not available'),
279
- 'preview_url': track.get('preview_url', 'Not available'),
280
- 'playlist_source': getattr(track, 'playlist_source', 'Direct Track')
281
- })
282
 
283
- return tracks_info
 
 
284
 
285
- # Main Interface Function
286
  def interface(project_name, spotify_urls, include_all_info=True):
287
- """Process multiple Spotify URLs (tracks or playlists) and combine results."""
288
- if not project_name:
289
- project_name = "spotify_tracks"
290
-
291
- print(f"Starting to process request for project: {project_name}")
292
-
293
- # Split and clean URLs
294
- urls_list = [url.strip() for url in spotify_urls.strip().split('\n') if url.strip()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  if not urls_list:
297
- error_message = "Please enter at least one Spotify URL (track or playlist)."
298
- return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
299
-
300
- # Validate URLs
301
- valid_urls = []
302
- for url in urls_list:
303
- if "spotify.com" in url and ("track" in url or "playlist" in url or "album" in url):
304
- valid_urls.append(url)
305
- else:
306
- print(f"Invalid URL format, skipping: {url}")
307
-
308
- if not valid_urls:
309
- error_message = "No valid Spotify URLs found. Please enter valid track or playlist URLs."
310
- return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
311
-
312
- print(f"Processing {len(valid_urls)} valid Spotify URLs")
313
-
314
- # Get token
315
- token_spotify = get_token(client_ids[current_api_index], client_secrets[current_api_index])
316
- if not token_spotify:
317
- error_message = "Failed to authenticate with Spotify API. Please try again later."
318
- return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
319
-
320
- print(f"Successfully authenticated with Spotify API")
321
 
 
322
  all_tracks = []
323
 
324
- # Process each URL
325
- for url in valid_urls:
326
  try:
327
- print(f"Processing URL: {url}")
328
- if "playlist" in url:
329
- tracks = get_playlist_tracks(token_spotify, url)
330
- # Add source information
331
- for track in tracks:
332
- track['playlist_source'] = url
 
 
333
  all_tracks.extend(tracks)
334
- elif "track" in url:
335
- track = get_track_info(token_spotify, url)
 
 
336
  if track:
337
- track[0]['playlist_source'] = url
338
  all_tracks.extend(track)
339
- elif "album" in url:
340
- album_tracks = get_album_tracks(token_spotify, url)
341
- all_tracks.extend(album_tracks)
342
  except Exception as e:
343
- print(f"Error processing URL {url}: {str(e)}")
344
  continue
345
 
346
- if not all_tracks:
347
- error_message = "Could not find any tracks in the provided URLs."
348
- return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
349
-
350
- # Extract track details including artist information
351
- print("Extracting detailed track information including ISRCs...")
352
- tracks_info = extract_track_details(all_tracks, token_spotify)
353
-
354
- # Remove duplicate tracks (based on ISRC or title+artist if ISRC not available)
355
- print("Creating DataFrame and removing duplicates...")
356
- df = pd.DataFrame(tracks_info)
357
-
358
- # Create a key for deduplication
359
- df['dedup_key'] = df.apply(
360
- lambda row: row['isrc'] if row['isrc'] != 'Not available' else f"{row['artist']}_{row['title']}",
361
- axis=1
362
- )
363
-
364
- # Drop duplicates
365
- df = df.drop_duplicates(subset='dedup_key')
366
- df = df.drop(columns=['dedup_key'])
367
-
368
- print(f"Found {len(df)} unique tracks after deduplication")
369
-
370
- # Filter columns if not include_all_info
371
- if not include_all_info:
372
- columns_to_keep = ['artist', 'title', 'isrc', 'album', 'genres', 'release_date', 'track_popularity', 'explicit', 'spotify_url']
373
- df = df[columns_to_keep]
374
-
375
- # Save DataFrame to an Excel file
376
- tmpfile = NamedTemporaryFile(delete=False, suffix='.xlsx')
377
- df.to_excel(tmpfile.name, index=False)
378
-
379
- # Rename the file with the project name
380
- project_file_name = f"{project_name}.xlsx"
381
- shutil.move(tmpfile.name, project_file_name)
382
-
383
- return df, project_file_name
384
-
385
- # Gradio Interface Configuration
386
- iface = gr.Interface(
387
- fn=interface,
388
- inputs=[
389
- gr.Textbox(label="Project Name", placeholder="Enter a name for your export"),
390
- gr.Textbox(
391
- label="Spotify URLs (Tracks, Albums or Playlists)",
392
- placeholder="Enter one Spotify URL per line (tracks, albums or playlists)",
393
- lines=5
394
- ),
395
- gr.Checkbox(label="Include All Track Information", value=True)
396
- ],
397
- outputs=[
398
- gr.Dataframe(),
399
- gr.File(label="Download Excel")
400
- ],
401
- title="Spotify Track Collector",
402
- description="Extract tracks from multiple Spotify playlists, albums, and tracks into a single Excel file.",
403
- examples=[
404
- ["Pop Collection", "https://open.spotify.com/playlist/37i9dQZF1DXcBWIGoYBM5M\nhttps://open.spotify.com/track/4cOdK2wGLETKBW3PvgPWqT", True],
405
- ["Rock Collection", "https://open.spotify.com/playlist/37i9dQZF1DWXRqgorJj26U", False],
406
- ["Album Tracks", "https://open.spotify.com/album/1R5BORZZxNUg8QMgbqt0nd", True]
407
- ],
408
- allow_flagging="never"
409
- )
410
 
411
- if __name__ == "__main__":
412
- iface.launch()
 
 
3
  import pandas as pd
4
  import time
5
  import shutil
 
6
  import os
7
  from tempfile import NamedTemporaryFile
 
8
 
9
  # Spotify API credentials from environment variables
10
+ client_ids = os.getenv("SPOTIFY_CLIENT_IDS", "").split(',')
11
+ client_secrets = os.getenv("SPOTIFY_CLIENT_SECRETS", "").split(',')
12
 
13
  if not client_ids or not client_secrets:
14
  raise ValueError("SPOTIFY_CLIENT_IDS or SPOTIFY_CLIENT_SECRETS environment variables not set.")
15
 
16
+ # Token rotation management
 
17
  current_api_index = 0
18
+ request_counter = 0
19
+ MAX_REQUESTS_PER_CLIENT = 100 # Rotar cliente despu茅s de X peticiones
20
+
21
+ def rotate_client():
22
+ global current_api_index, request_counter
23
+ current_api_index = (current_api_index + 1) % len(client_ids)
24
+ request_counter = 0
25
+ print(f"Rotando a cliente Spotify #{current_api_index + 1}")
26
+
27
+ def get_token():
28
+ global current_api_index
29
+ for _ in range(len(client_ids)):
30
+ client_id = client_ids[current_api_index]
31
+ client_secret = client_secrets[current_api_index]
32
+ url = 'https://accounts.spotify.com/api/token'
33
+ response = requests.post(url,
34
+ headers={'Content-Type': 'application/x-www-form-urlencoded'},
35
+ data={'grant_type': 'client_credentials'},
36
+ auth=(client_id, client_secret))
37
+
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  if response.status_code == 200:
39
+ return response.json().get('access_token')
40
  else:
41
+ print(f"Error con cliente {current_api_index}: {response.text}")
42
+ rotate_client()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ raise Exception("Todos los clientes Spotify fallaron")
 
 
 
45
 
46
+ def make_request_with_retry(url, headers, params=None):
47
+ global request_counter
48
+ for _ in range(3): # 3 intentos por cliente
49
+ response = requests.get(url, headers=headers, params=params)
50
+ request_counter += 1
 
 
 
 
51
 
52
+ if response.status_code == 429:
53
+ print(f"Rate limit alcanzado. Cliente actual: {current_api_index}")
54
+ rotate_client()
55
+ headers['Authorization'] = f'Bearer {get_token()}'
56
+ time.sleep(int(response.headers.get('Retry-After', 10)))
 
 
 
 
 
 
 
 
57
  continue
 
 
 
 
 
 
 
 
 
58
 
59
+ if response.status_code == 200:
60
+ if request_counter >= MAX_REQUESTS_PER_CLIENT:
61
+ rotate_client()
62
+ headers['Authorization'] = f'Bearer {get_token()}' # Actualizar token
63
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ print(f"Error {response.status_code}: {response.text}")
66
+ time.sleep(2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ return None
69
+
70
+ # ... (keep your existing functions like get_playlist_tracks, get_album_tracks, etc.) ...
71
 
 
72
  def interface(project_name, spotify_urls, include_all_info=True):
73
+ global current_api_index, request_counter
74
+ current_api_index = 0
75
+ request_counter = 0
76
+
77
+ # Validaci贸n de URLs mejorada
78
+ urls_list = []
79
+ for url in spotify_urls.strip().split('\n'):
80
+ url = url.strip()
81
+ if not url: continue
82
+
83
+ # Extraer tipo correctamente
84
+ if "spotify.com" in url:
85
+ if "/playlist/" in url:
86
+ url_type = "playlist"
87
+ elif "/album/" in url:
88
+ url_type = "album"
89
+ elif "/track/" in url:
90
+ url_type = "track"
91
+ else:
92
+ print(f"URL no soportada: {url}")
93
+ continue
94
+ urls_list.append((url, url_type))
95
 
96
  if not urls_list:
97
+ return gr.Dataframe(value=pd.DataFrame({"Error": ["No valid URLs provided"]})), None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ token = get_token()
100
  all_tracks = []
101
 
102
+ for url, url_type in urls_list:
 
103
  try:
104
+ if url_type == "playlist":
105
+ print(f"Procesando playlist: {url}")
106
+ tracks = get_playlist_tracks(token, url)
107
+ all_tracks.extend(tracks)
108
+
109
+ elif url_type == "album":
110
+ print(f"Procesando 谩lbum: {url}")
111
+ tracks = get_album_tracks(token, url)
112
  all_tracks.extend(tracks)
113
+
114
+ elif url_type == "track":
115
+ print(f"Procesando track: {url}")
116
+ track = get_track_info(token, url)
117
  if track:
 
118
  all_tracks.extend(track)
119
+
 
 
120
  except Exception as e:
121
+ print(f"Error procesando {url}: {str(e)}")
122
  continue
123
 
124
+ # ... (resto del procesamiento igual) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ # Mant茅n la interfaz de Gradio igual
127
+ iface = gr.Interface(...)
128
+ iface.launch()