Spaces:

Mthrfkr
/

Spotify_Automated_Track_Recommendation_script

Runtime error

App Files Files Community

Mthrfkr commited on Mar 5, 2025

Commit

4d76c8d

verified ·

1 Parent(s): c33f952

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -374

app.py CHANGED Viewed

@@ -3,410 +3,126 @@ import requests
 import pandas as pd
 import time
 import shutil
-import numpy as np
 import os
 from tempfile import NamedTemporaryFile
-from openpyxl import Workbook
 # Spotify API credentials from environment variables
-client_ids = os.getenv("SPOTIFY_CLIENT_IDS")
-client_secrets = os.getenv("SPOTIFY_CLIENT_SECRETS")
 if not client_ids or not client_secrets:
     raise ValueError("SPOTIFY_CLIENT_IDS or SPOTIFY_CLIENT_SECRETS environment variables not set.")
-client_ids = client_ids.split(',')
-client_secrets = client_secrets.split(',')
 current_api_index = 0
-# Request counters
-total_requests = 0
-# Spotify Functions
-def get_token(client_id, client_secret):
-    url = 'https://accounts.spotify.com/api/token'
-    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
-    payload = {'grant_type': 'client_credentials'}
-    response = requests.post(url, headers=headers, data=payload, auth=(client_id, client_secret))
-    global total_requests
-    total_requests += 1
-    if response.status_code == 200:
-        return response.json().get('access_token')
-    else:
-        print(f"Error getting token: {response.status_code} - {response.text}")
-        return None
-def handle_rate_limit(response, attempt):
-    if response.status_code == 429:
-        retry_after = int(response.headers.get('Retry-After', 1))
-        wait_time = retry_after + 10 * (2 ** attempt)  # Exponential backoff
-        time.sleep(wait_time)
-        return True
-    return False
-def make_request_with_retry(url, headers, params=None, max_retries=5):
-    global total_requests
-    for attempt in range(max_retries):
-        response = requests.get(url, headers=headers, params=params)
-        total_requests += 1
-        if handle_rate_limit(response, attempt):
-            continue
         if response.status_code == 200:
-            return response
         else:
-            print(f"Request failed: {response.status_code} - {response.text}")
-            time.sleep(1)  # Small delay before retrying
-    return None
-def extract_id_from_url(url, type_keyword):
-    """Extract Spotify ID from URL for track, playlist or album."""
-    if type_keyword in url:
-        parts = url.split("/")
-        for i, part in enumerate(parts):
-            if type_keyword in part and i + 1 < len(parts):
-                potential_id = parts[i + 1].split("?")[0]
-                if potential_id:
-                    return potential_id
-    # Special handling for spotify.com/intl-xx/album type URLs
-    if "intl-" in url and "/album/" in url:
-        parts = url.split("/album/")
-        if len(parts) > 1:
-            return parts[1].split("?")[0].split("/")[0]
-    # If above fails, try to find ID in the last part of the URL
-    parts = url.split("/")
-    last_part = parts[-1]
-    if "?" in last_part:
-        return last_part.split("?")[0]
-    return last_part
-def get_playlist_tracks(token, playlist_url):
-    """Get all tracks from a playlist URL."""
-    playlist_id = extract_id_from_url(playlist_url, "playlist")
-    print(f"Extracted playlist ID: {playlist_id}")
-    headers = {'Authorization': f'Bearer {token}'}
-    tracks_url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
-    all_tracks = []
-    next_url = tracks_url
-    while next_url:
-        print(f"Fetching tracks from: {next_url}")
-        response = make_request_with_retry(next_url, headers)
-        if not response:
-            break
-        data = response.json()
-        items = data.get('items', [])
-        for item in items:
-            if item and item.get('track'):
-                track = item['track']
-                all_tracks.append(track)
-        next_url = data.get('next')
-    print(f"Found {len(all_tracks)} tracks in playlist")
-    return all_tracks
-def get_track_info(token, track_url):
-    """Get information about a single track."""
-    track_id = extract_id_from_url(track_url, "track")
-    print(f"Extracted track ID: {track_id}")
-    headers = {'Authorization': f'Bearer {token}'}
-    url = f'https://api.spotify.com/v1/tracks/{track_id}'
-    response = make_request_with_retry(url, headers)
-    if response:
-        return [response.json()]
-    return []
-def get_album_tracks(token, album_url):
-    """Get all tracks from an album URL."""
-    album_id = extract_id_from_url(album_url, "album")
-    print(f"Extracted album ID: {album_id}")
-    headers = {'Authorization': f'Bearer {token}'}
-    # First get the album to get its information
-    album_info = None
-    album_response = make_request_with_retry(f'https://api.spotify.com/v1/albums/{album_id}', headers)
-    if album_response:
-        album_info = album_response.json()
-        print(f"Successfully retrieved album info: {album_info.get('name')}")
-    else:
-        print(f"Failed to retrieve album info for ID: {album_id}")
-    tracks_url = f'https://api.spotify.com/v1/albums/{album_id}/tracks'
-    all_tracks = []
-    next_url = tracks_url
-    while next_url:
-        print(f"Fetching tracks from: {next_url}")
-        response = make_request_with_retry(next_url, headers)
-        if not response:
-            break
-        data = response.json()
-        items = data.get('items', [])
-        for item in items:
-            if item:
-                # We need to get full track information for ISRCs
-                track_id = item.get('id')
-                if track_id:
-                    track_response = make_request_with_retry(f'https://api.spotify.com/v1/tracks/{track_id}', headers)
-                    if track_response:
-                        track = track_response.json()
-                        # Add album info
-                        if album_info:
-                            track['album'] = {
-                                'name': album_info.get('name', 'Unknown'),
-                                'release_date': album_info.get('release_date', 'Not available'),
-                                'id': album_id
-                            }
-                        all_tracks.append(track)
-                        continue
-                # Fallback if we couldn't get the full track
-                if album_info:
-                    item['album'] = {
-                        'name': album_info.get('name', 'Unknown'),
-                        'release_date': album_info.get('release_date', 'Not available'),
-                        'id': album_id
-                    }
-                all_tracks.append(item)
-        next_url = data.get('next')
-    print(f"Found {len(all_tracks)} tracks in album")
-    # Mark the source
-    for track in all_tracks:
-        track['playlist_source'] = album_url
-    return all_tracks
-def get_artist_details(token, artist_id):
-    """Get artist information including genres."""
-    headers = {'Authorization': f'Bearer {token}'}
-    url = f'https://api.spotify.com/v1/artists/{artist_id}'
-    response = make_request_with_retry(url, headers)
-    if response:
-        return response.json()
-    return None
-def get_track_isrc(token, track):
-    """Get ISRC code for a track if not already present."""
-    if track.get('external_ids', {}).get('isrc'):
-        return track['external_ids']['isrc']
-    # If track doesn't have ISRC, try to get it from the API
-    if track.get('id'):
-        headers = {'Authorization': f'Bearer {token}'}
-        url = f'https://api.spotify.com/v1/tracks/{track["id"]}'
-        response = make_request_with_retry(url, headers)
-        if response and response.json().get('external_ids', {}).get('isrc'):
-            return response.json()['external_ids']['isrc']
-    return 'Not available'
-def extract_track_details(tracks, token):
-    """Extract relevant information from track objects including artist details."""
-    tracks_info = []
-    artists_cache = {}  # Cache artist details to reduce API calls
-    for track in tracks:
-        if not track:
             continue
-        # Get artist details if we have an artist ID
-        artist_id = None
-        artist_name = 'Unknown'
-        genres = []
-        if track.get('artists') and len(track['artists']) > 0:
-            artist_id = track['artists'][0].get('id')
-            artist_name = track['artists'][0].get('name', 'Unknown')
-            # Only fetch artist details if we have an ID and haven't cached it already
-            if artist_id and artist_id not in artists_cache:
-                artist_data = get_artist_details(token, artist_id)
-                if artist_data:
-                    artists_cache[artist_id] = {
-                        'genres': artist_data.get('genres', []),
-                        'popularity': artist_data.get('popularity', 'Not available'),
-                        'followers': artist_data.get('followers', {}).get('total', 'Not available')
-                    }
-            # Get genres from cache
-            if artist_id in artists_cache:
-                genres = artists_cache[artist_id].get('genres', [])
-        # Get ISRC if not already present
-        isrc = track.get('external_ids', {}).get('isrc', 'Not available')
-        if isrc == 'Not available':
-            isrc = get_track_isrc(token, track)
-        # Calculate duration in minutes:seconds format
-        duration_ms = track.get('duration_ms', 0)
-        if duration_ms:
-            minutes = duration_ms // 60000
-            seconds = (duration_ms % 60000) // 1000
-            duration_formatted = f"{minutes}:{seconds:02d}"
-        else:
-            duration_formatted = 'Not available'
-        tracks_info.append({
-            'artist': artist_name,
-            'title': track.get('name', 'Unknown'),
-            'album': track.get('album', {}).get('name', 'Unknown'),
-            'isrc': isrc,
-            'track_popularity': track.get('popularity', 'Not available'),
-            'genres': ', '.join(genres) if genres else 'Not available',
-            'artist_popularity': artists_cache.get(artist_id, {}).get('popularity', 'Not available') if artist_id else 'Not available',
-            'artist_followers': artists_cache.get(artist_id, {}).get('followers', 'Not available') if artist_id else 'Not available',
-            'release_date': track.get('album', {}).get('release_date', 'Not available'),
-            'duration': duration_formatted,
-            'duration_ms': duration_ms,
-            'explicit': 'Yes' if track.get('explicit', False) else 'No',
-            'spotify_url': track.get('external_urls', {}).get('spotify', 'Not available'),
-            'preview_url': track.get('preview_url', 'Not available'),
-            'playlist_source': getattr(track, 'playlist_source', 'Direct Track')
-        })
-    return tracks_info
-# Main Interface Function
 def interface(project_name, spotify_urls, include_all_info=True):
-    """Process multiple Spotify URLs (tracks or playlists) and combine results."""
-    if not project_name:
-        project_name = "spotify_tracks"
-    print(f"Starting to process request for project: {project_name}")
-    # Split and clean URLs
-    urls_list = [url.strip() for url in spotify_urls.strip().split('\n') if url.strip()]
     if not urls_list:
-        error_message = "Please enter at least one Spotify URL (track or playlist)."
-        return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
-    # Validate URLs
-    valid_urls = []
-    for url in urls_list:
-        if "spotify.com" in url and ("track" in url or "playlist" in url or "album" in url):
-            valid_urls.append(url)
-        else:
-            print(f"Invalid URL format, skipping: {url}")
-    if not valid_urls:
-        error_message = "No valid Spotify URLs found. Please enter valid track or playlist URLs."
-        return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
-    print(f"Processing {len(valid_urls)} valid Spotify URLs")
-    # Get token
-    token_spotify = get_token(client_ids[current_api_index], client_secrets[current_api_index])
-    if not token_spotify:
-        error_message = "Failed to authenticate with Spotify API. Please try again later."
-        return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
-    print(f"Successfully authenticated with Spotify API")
     all_tracks = []
-    # Process each URL
-    for url in valid_urls:
         try:
-            print(f"Processing URL: {url}")
-            if "playlist" in url:
-                tracks = get_playlist_tracks(token_spotify, url)
-                # Add source information
-                for track in tracks:
-                    track['playlist_source'] = url
                 all_tracks.extend(tracks)
-            elif "track" in url:
-                track = get_track_info(token_spotify, url)
                 if track:
-                    track[0]['playlist_source'] = url
                     all_tracks.extend(track)
-            elif "album" in url:
-                album_tracks = get_album_tracks(token_spotify, url)
-                all_tracks.extend(album_tracks)
         except Exception as e:
-            print(f"Error processing URL {url}: {str(e)}")
             continue
-    if not all_tracks:
-        error_message = "Could not find any tracks in the provided URLs."
-        return gr.Dataframe(value=pd.DataFrame({"Error": [error_message]})), None
-    # Extract track details including artist information
-    print("Extracting detailed track information including ISRCs...")
-    tracks_info = extract_track_details(all_tracks, token_spotify)
-    # Remove duplicate tracks (based on ISRC or title+artist if ISRC not available)
-    print("Creating DataFrame and removing duplicates...")
-    df = pd.DataFrame(tracks_info)
-    # Create a key for deduplication
-    df['dedup_key'] = df.apply(
-        lambda row: row['isrc'] if row['isrc'] != 'Not available' else f"{row['artist']}_{row['title']}",
-        axis=1
-    )
-    # Drop duplicates
-    df = df.drop_duplicates(subset='dedup_key')
-    df = df.drop(columns=['dedup_key'])
-    print(f"Found {len(df)} unique tracks after deduplication")
-    # Filter columns if not include_all_info
-    if not include_all_info:
-        columns_to_keep = ['artist', 'title', 'isrc', 'album', 'genres', 'release_date', 'track_popularity', 'explicit', 'spotify_url']
-        df = df[columns_to_keep]
-    # Save DataFrame to an Excel file
-    tmpfile = NamedTemporaryFile(delete=False, suffix='.xlsx')
-    df.to_excel(tmpfile.name, index=False)
-    # Rename the file with the project name
-    project_file_name = f"{project_name}.xlsx"
-    shutil.move(tmpfile.name, project_file_name)
-    return df, project_file_name
-# Gradio Interface Configuration
-iface = gr.Interface(
-    fn=interface,
-    inputs=[
-        gr.Textbox(label="Project Name", placeholder="Enter a name for your export"),
-        gr.Textbox(
-            label="Spotify URLs (Tracks, Albums or Playlists)",
-            placeholder="Enter one Spotify URL per line (tracks, albums or playlists)",
-            lines=5
-        ),
-        gr.Checkbox(label="Include All Track Information", value=True)
-    ],
-    outputs=[
-        gr.Dataframe(),
-        gr.File(label="Download Excel")
-    ],
-    title="Spotify Track Collector",
-    description="Extract tracks from multiple Spotify playlists, albums, and tracks into a single Excel file.",
-    examples=[
-        ["Pop Collection", "https://open.spotify.com/playlist/37i9dQZF1DXcBWIGoYBM5M\nhttps://open.spotify.com/track/4cOdK2wGLETKBW3PvgPWqT", True],
-        ["Rock Collection", "https://open.spotify.com/playlist/37i9dQZF1DWXRqgorJj26U", False],
-        ["Album Tracks", "https://open.spotify.com/album/1R5BORZZxNUg8QMgbqt0nd", True]
-    ],
-    allow_flagging="never"
-)
-if __name__ == "__main__":
-    iface.launch()

 import pandas as pd
 import time
 import shutil
 import os
 from tempfile import NamedTemporaryFile
 # Spotify API credentials from environment variables
+client_ids = os.getenv("SPOTIFY_CLIENT_IDS", "").split(',')
+client_secrets = os.getenv("SPOTIFY_CLIENT_SECRETS", "").split(',')
 if not client_ids or not client_secrets:
     raise ValueError("SPOTIFY_CLIENT_IDS or SPOTIFY_CLIENT_SECRETS environment variables not set.")
+# Token rotation management
 current_api_index = 0
+request_counter = 0
+MAX_REQUESTS_PER_CLIENT = 100  # Rotar cliente después de X peticiones
+def rotate_client():
+    global current_api_index, request_counter
+    current_api_index = (current_api_index + 1) % len(client_ids)
+    request_counter = 0
+    print(f"Rotando a cliente Spotify #{current_api_index + 1}")
+def get_token():
+    global current_api_index
+    for _ in range(len(client_ids)):
+        client_id = client_ids[current_api_index]
+        client_secret = client_secrets[current_api_index]
+        url = 'https://accounts.spotify.com/api/token'
+        response = requests.post(url,
+            headers={'Content-Type': 'application/x-www-form-urlencoded'},
+            data={'grant_type': 'client_credentials'},
+            auth=(client_id, client_secret))
         if response.status_code == 200:
+            return response.json().get('access_token')
         else:
+            print(f"Error con cliente {current_api_index}: {response.text}")
+            rotate_client()
+    raise Exception("Todos los clientes Spotify fallaron")
+def make_request_with_retry(url, headers, params=None):
+    global request_counter
+    for _ in range(3):  # 3 intentos por cliente
+        response = requests.get(url, headers=headers, params=params)
+        request_counter += 1
+        if response.status_code == 429:
+            print(f"Rate limit alcanzado. Cliente actual: {current_api_index}")
+            rotate_client()
+            headers['Authorization'] = f'Bearer {get_token()}'
+            time.sleep(int(response.headers.get('Retry-After', 10)))
             continue
+        if response.status_code == 200:
+            if request_counter >= MAX_REQUESTS_PER_CLIENT:
+                rotate_client()
+                headers['Authorization'] = f'Bearer {get_token()}'  # Actualizar token
+            return response
+        print(f"Error {response.status_code}: {response.text}")
+        time.sleep(2)
+    return None
+# ... (keep your existing functions like get_playlist_tracks, get_album_tracks, etc.) ...
 def interface(project_name, spotify_urls, include_all_info=True):
+    global current_api_index, request_counter
+    current_api_index = 0
+    request_counter = 0
+    # Validación de URLs mejorada
+    urls_list = []
+    for url in spotify_urls.strip().split('\n'):
+        url = url.strip()
+        if not url: continue
+        # Extraer tipo correctamente
+        if "spotify.com" in url:
+            if "/playlist/" in url:
+                url_type = "playlist"
+            elif "/album/" in url:
+                url_type = "album"
+            elif "/track/" in url:
+                url_type = "track"
+            else:
+                print(f"URL no soportada: {url}")
+                continue
+            urls_list.append((url, url_type))
     if not urls_list:
+        return gr.Dataframe(value=pd.DataFrame({"Error": ["No valid URLs provided"]})), None
+    token = get_token()
     all_tracks = []
+    for url, url_type in urls_list:
         try:
+            if url_type == "playlist":
+                print(f"Procesando playlist: {url}")
+                tracks = get_playlist_tracks(token, url)
+                all_tracks.extend(tracks)
+            elif url_type == "album":
+                print(f"Procesando álbum: {url}")
+                tracks = get_album_tracks(token, url)
                 all_tracks.extend(tracks)
+            elif url_type == "track":
+                print(f"Procesando track: {url}")
+                track = get_track_info(token, url)
                 if track:
                     all_tracks.extend(track)
         except Exception as e:
+            print(f"Error procesando {url}: {str(e)}")
             continue
+    # ... (resto del procesamiento igual) ...
+# Mantén la interfaz de Gradio igual
+iface = gr.Interface(...)
+iface.launch()