Mthrfkr commited on
Commit
3307ce1
·
verified ·
1 Parent(s): 956787c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +235 -182
app.py CHANGED
@@ -1,190 +1,243 @@
1
- import gradio as gr
2
- import requests
 
 
3
  import pandas as pd
4
- from tempfile import NamedTemporaryFile
5
- from openpyxl import Workbook
6
- import shutil
7
-
8
- # Lista de credenciales de API de Spotify
9
- client_ids = ['b4a2add66ffb4f1198b94b087b365c65', '9df51caba5d247dc921b21de35a47c44']
10
- client_secrets = ['8045eacf956a477299d2bc41752f1f73', '0e39502ec7e74fe99bb74245678d5f0d']
11
- current_api_index = 0
12
-
13
- # Funciones para Spotify
14
- def obtener_token(client_id, client_secret):
15
- print(f"Obteniendo token de Spotify con client_id {client_id}...")
16
- url = 'https://accounts.spotify.com/api/token'
17
- headers = {'Content-Type': 'application/x-www-form-urlencoded'}
18
- payload = {'grant_type': 'client_credentials'}
19
- response = requests.post(url, headers=headers, data=payload, auth=(client_id, client_secret))
20
- return response.json().get('access_token')
21
-
22
- def cambiar_api_key():
23
- global current_api_index
24
- current_api_index = (current_api_index + 1) % len(client_ids)
25
- return obtener_token(client_ids[current_api_index], client_secrets[current_api_index])
26
-
27
- def buscar_playlists_spotify(token, query, limit=50):
28
- print("Buscando playlists en Spotify...")
29
- url = 'https://api.spotify.com/v1/search'
30
- headers = {'Authorization': f'Bearer {token}'}
31
- playlists = []
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  try:
34
- if limit <= 50:
35
- params = {'q': query, 'type': 'playlist', 'limit': limit}
36
- response = requests.get(url, headers=headers, params=params)
37
- if response.status_code == 429: # Límite alcanzado
38
- token = cambiar_api_key()
39
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'}, params=params)
40
- playlists.extend(response.json().get('playlists', {}).get('items', []))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  else:
42
- offset = 0
43
- while limit > 0:
44
- params = {'q': query, 'type': 'playlist', 'limit': min(50, limit), 'offset': offset}
45
- response = requests.get(url, headers=headers, params=params)
46
- if response.status_code == 429: # Límite alcanzado
47
- token = cambiar_api_key()
48
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'}, params=params)
49
- playlists.extend(response.json().get('playlists', {}).get('items', []))
50
- limit -= min(50, limit)
51
- offset += 50
 
 
 
 
 
52
  except Exception as e:
53
- print(f"Error al buscar playlists: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- return [{'playlist_id': playlist['id'], 'playlist_name': playlist['name']} for playlist in playlists]
56
-
57
- def obtener_canciones_playlist_spotify(token, playlist_id, playlist_name):
58
- print(f"Obteniendo canciones de la playlist {playlist_id} ({playlist_name}) de Spotify...")
59
- url = f'https://api.spotify.com/v1/playlists/{playlist_id}/tracks'
60
- headers = {'Authorization': f'Bearer {token}'}
61
- canciones = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  try:
64
- response = requests.get(url, headers=headers)
65
- if response.status_code == 429: # Límite alcanzado
66
- token = cambiar_api_key()
67
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'})
68
- if response.status_code == 200:
69
- tracks = response.json().get('items')
70
- for item in tracks:
71
- track = item.get('track')
72
- if track:
73
- audio_features = obtener_caracteristicas_audio(token, track['id'])
74
- audio_analysis = obtener_analisis_audio(token, track['id'])
75
- key = obtener_clave(audio_analysis)
76
- canciones.append({
77
- 'playlist_name': playlist_name,
78
- 'artista': track['artists'][0]['name'] if track['artists'] else 'Desconocido',
79
- 'titulo': track['name'],
80
- 'isrc': track['external_ids'].get('isrc', 'No disponible'),
81
- 'popularity': track.get('popularity', 'No disponible'),
82
- 'valence': audio_features.get('valence', 'No disponible'),
83
- 'danceability': audio_features.get('danceability', 'No disponible'),
84
- 'energy': audio_features.get('energy', 'No disponible'),
85
- 'tempo': audio_features.get('tempo', 'No disponible'),
86
- 'speechiness': audio_features.get('speechiness', 'No disponible'),
87
- 'instrumentalness': audio_features.get('instrumentalness', 'No disponible'),
88
- 'duration': track.get('duration_ms', 'No disponible'),
89
- 'release_year': track.get('album', {}).get('release_date', 'No disponible').split('-')[0] if track.get('album', {}).get('release_date') else 'No disponible',
90
- 'loudness': audio_analysis.get('track', {}).get('loudness', 'No disponible'),
91
- 'timbre': audio_analysis.get('segments', [{}])[0].get('timbre', 'No disponible'),
92
- 'acousticness': audio_features.get('acousticness', 'No disponible'),
93
- 'liveness': audio_features.get('liveness', 'No disponible'),
94
- 'key': key,
95
- 'link': track['external_urls']['spotify'],
96
- 'record_label': obtener_record_label_spotify(track['album']['id'], token),
97
- 'source': 'Spotify'
98
- })
99
- except Exception as e:
100
- print(f"Error al obtener canciones de la playlist: {e}")
101
-
102
- return canciones
103
-
104
- def obtener_caracteristicas_audio(token, track_id):
105
- url = f'https://api.spotify.com/v1/audio-features/{track_id}'
106
- headers = {'Authorization': f'Bearer {token}'}
107
- response = requests.get(url, headers=headers)
108
- if response.status_code == 429: # Límite alcanzado
109
- token = cambiar_api_key()
110
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'})
111
- return response.json() if response.status_code == 200 else {}
112
-
113
- def obtener_analisis_audio(token, track_id):
114
- url = f'https://api.spotify.com/v1/audio-analysis/{track_id}'
115
- headers = {'Authorization': f'Bearer {token}'}
116
- response = requests.get(url, headers=headers)
117
- if response.status_code == 429: # Límite alcanzado
118
- token = cambiar_api_key()
119
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'})
120
- return response.json() if response.status_code == 200 else {}
121
-
122
- def obtener_clave(audio_analysis):
123
- key_map = {
124
- -1: 'No Key',
125
- 0: 'C',
126
- 1: 'C#/Db',
127
- 2: 'D',
128
- 3: 'D#/Eb',
129
- 4: 'E',
130
- 5: 'F',
131
- 6: 'F#/Gb',
132
- 7: 'G',
133
- 8: 'G#/Ab',
134
- 9: 'A',
135
- 10: 'A#/Bb',
136
- 11: 'B'
137
- }
138
- key = audio_analysis.get('track', {}).get('key', -1)
139
- return key_map.get(key, 'Unknown')
140
-
141
- def obtener_record_label_spotify(album_id, token):
142
- url = f'https://api.spotify.com/v1/albums/{album_id}'
143
- headers = {'Authorization': f'Bearer {token}'}
144
- response = requests.get(url, headers=headers)
145
- if response.status_code == 429: # Límite alcanzado
146
- token = cambiar_api_key()
147
- response = requests.get(url, headers={'Authorization': f'Bearer {token}'})
148
- album_info = response.json() if response.status_code == 200 else {}
149
- return album_info.get('label', 'No disponible')
150
-
151
- # Función principal de la interfaz
152
- def interface(project_name, query, num_spotify_playlists=50):
153
- # Obtener tokens y claves
154
- token_spotify = obtener_token(client_ids[current_api_index], client_secrets[current_api_index])
155
- playlists_spotify = buscar_playlists_spotify(token_spotify, query, num_spotify_playlists)
156
- canciones_spotify = []
157
- for playlist in playlists_spotify:
158
- songs = obtener_canciones_playlist_spotify(token_spotify, playlist['playlist_id'], playlist['playlist_name'])
159
- canciones_spotify.extend(songs)
160
-
161
- # Crear DataFrame
162
- df = pd.DataFrame(canciones_spotify)
163
- df.rename(columns={'isrc': 'ISRCs'}, inplace=True)
164
-
165
- # Ordenar por popularidad
166
- df.sort_values(by=['popularity'], ascending=False, inplace=True)
167
-
168
- # Guardar DataFrame en un archivo Excel
169
- tmpfile = NamedTemporaryFile(delete=False, suffix='.xlsx')
170
- df.to_excel(tmpfile.name, index=False)
171
-
172
- # Renombrar el archivo con el nombre del proyecto
173
- project_file_name = f"{project_name}.xlsx"
174
- shutil.move(tmpfile.name, project_file_name)
175
-
176
- return df, project_file_name # Devuelve el DataFrame y el enlace al archivo Excel
177
-
178
- # Configuración de Gradio
179
- iface = gr.Interface(
180
- fn=interface,
181
- inputs=[
182
- gr.Textbox(label="Nombre del Proyecto"),
183
- gr.Textbox(label="Keywords - Palabras Clave para tu búsqueda"),
184
- gr.Number(label="Numero de Playlists que vamos a buscar con estas Keywords", value=50, minimum=1, maximum=1000)
185
- ],
186
- outputs=[gr.Dataframe(), gr.File(label="Download Excel")],
187
- title="Spotify Playlist Fetcher",
188
- description="Enter a search query to fetch playlists and their songs from Spotify. Client credentials are pre-configured."
189
- )
190
- iface.launch()
 
1
+ import os
2
+ import threading
3
+ import tkinter as tk
4
+ from tkinter import simpledialog, messagebox, filedialog, ttk
5
  import pandas as pd
6
+ from collections import defaultdict, deque
7
+ from selenium import webdriver
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.common.keys import Keys
10
+ from selenium.webdriver.chrome.service import Service
11
+ from selenium.webdriver.support.ui import WebDriverWait
12
+ from selenium.webdriver.support import expected_conditions as EC
13
+ from webdriver_manager.chrome import ChromeDriverManager
14
+ import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ api_key_lastfm = '463d87b43ca23d29f3a8d182d3f9763e'
17
+ GENIUS_API_KEY = "4RpTijlKsKPeObCICn_ZK2eCYucI_pxiHyFOM-NxBIv1a3wF0jZg9MUGeRgU5i8T"
18
+
19
+ def iniciar_sesion(driver, usuario, contrasena, progress, root):
20
+ try:
21
+ driver.get("https://producer.x5music.com/")
22
+ username = WebDriverWait(driver, 10).until(
23
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div/div/div[1]/form/div/input[1]")))
24
+ username.send_keys(usuario)
25
+ password = WebDriverWait(driver, 10).until(
26
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/div/div/div[1]/form/div/input[2]")))
27
+ password.send_keys(contrasena)
28
+ password.send_keys(Keys.RETURN)
29
+ progress.set(20)
30
+ root.update_idletasks()
31
+ except Exception as e:
32
+ messagebox.showerror("Error", f"Error al iniciar sesión: {str(e)}")
33
+ root.destroy()
34
+
35
+ def obtener_generos_lastfm(isrc):
36
+ url = f"http://ws.audioscrobbler.com/2.0/?method=track.getInfo&api_key={api_key_lastfm}&format=json&mbid={isrc}"
37
+ response = requests.get(url)
38
+ data = response.json()
39
+ if 'track' in data and 'toptags' in data['track']:
40
+ genres = [tag['name'] for tag in data['track']['toptags']['tag']]
41
+ return ', '.join(genres)
42
+ return 'Unknown'
43
+
44
+ def obtener_letras_genius(artist, title):
45
+ base_url = "https://api.genius.com"
46
+ headers = {'Authorization': f'Bearer {GENIUS_API_KEY}'}
47
+ search_url = f"{base_url}/search"
48
+ data = {'q': f"{artist} {title}"}
49
+ response = requests.get(search_url, data=data, headers=headers)
50
+ if response.status_code == 200:
51
+ json_data = response.json()
52
+ song_info = None
53
+ for hit in json_data['response']['hits']:
54
+ if artist.lower() in hit['result']['primary_artist']['name'].lower():
55
+ song_info = hit
56
+ break
57
+ if song_info:
58
+ song_api_path = song_info['result']['api_path']
59
+ song_url = f"{base_url}{song_api_path}"
60
+ song_response = requests.get(song_url, headers=headers)
61
+ song_json = song_response.json()
62
+ if 'lyrics' in song_json['response']['song']:
63
+ return song_json['response']['song']['lyrics']['plain']
64
+ return 'Lyrics not found'
65
+
66
+ def procesar_isrc(driver, file_path, output_folder, progress, root):
67
  try:
68
+ workbook = pd.read_excel(file_path, engine='openpyxl')
69
+ isrc_list = workbook['ISRCs'].dropna().tolist()
70
+
71
+ # Dividir la lista de ISRCs en grupos de 1000
72
+ isrc_groups = [isrc_list[i:i + 1000] for i in range(0, len(isrc_list), 1000)]
73
+ all_data = []
74
+
75
+ for index, group in enumerate(isrc_groups):
76
+ isrc_string = ','.join(group)
77
+ print(f"Enviando grupo {index+1} de {len(group)} ISRCs.")
78
+
79
+ isrc_input = WebDriverWait(driver, 10).until(
80
+ EC.presence_of_element_located((By.XPATH, "//*[@id='isrcs']")))
81
+ isrc_input.clear()
82
+ isrc_input.send_keys(isrc_string)
83
+ isrc_input.send_keys(Keys.RETURN)
84
+
85
+ # Esperar que la tabla se actualice
86
+ WebDriverWait(driver, 30).until(
87
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[3]/div[2]/div/ul/li/div[2]/div/div[2]/div[3]/div[1]/table/tbody")))
88
+
89
+ tbody = driver.find_element(By.XPATH, "/html/body/div[3]/div[2]/div/ul/li/div[2]/div/div[2]/div[3]/div[1]/table/tbody")
90
+ rows = tbody.find_elements(By.XPATH, ".//tr")
91
+ group_data = [[cell.get_attribute('textContent') for cell in row.find_elements(By.XPATH, ".//td")] for row in rows]
92
+
93
+ all_data.extend(group_data)
94
+ progress.set(10 + 90 * (index + 1) // len(isrc_groups))
95
+ root.update_idletasks()
96
+
97
+ df = pd.DataFrame(all_data)
98
+ if len(df.columns) >= 3:
99
+ df.columns = ['Column1', 'Title', 'Artist'] + df.columns[3:].tolist()
100
+ print(df.columns) # Para verificar
101
+
102
+ # Eliminar duplicados
103
+ df.drop_duplicates(subset=['Title', 'Artist'], keep='first', inplace=True)
104
+
105
+ # Filtrar canciones que no tengan "Global" en la columna H (índice 7)
106
+ if len(df.columns) > 7:
107
+ print(f"Filtrando canciones no 'Global', total antes del filtro: {len(df)}")
108
+ df = df[df.iloc[:, 7].str.strip() == 'Global']
109
+ print(f"Total de canciones después del filtro: {len(df)}")
110
  else:
111
+ print("La columna en la posición 7 no está presente en los datos.")
112
+
113
+ # Obtener la información de la base original
114
+ df_base = pd.read_excel(file_path, engine='openpyxl')
115
+ df_final = df_base[df_base['ISRCs'].isin(df['ISRCs'])]
116
+
117
+ # Añadir columnas de género y letras
118
+ df_final['Genres'] = df_final['ISRCs'].apply(obtener_generos_lastfm)
119
+ df_final['Lyrics'] = df_final.apply(lambda row: obtener_letras_genius(row['Artist'], row['Title']), axis=1)
120
+
121
+ guardar_datos_en_excel(df_final, output_folder, os.path.basename(file_path))
122
+ num_songs = len(df_final)
123
+ messagebox.showinfo("Información", f"Se encontraron {num_songs} canciones únicas en total.")
124
+ return df_final
125
+
126
  except Exception as e:
127
+ messagebox.showerror("Error", f"Error al procesar ISRCs: {str(e)}")
128
+ root.destroy()
129
+ return None
130
+
131
+ def guardar_datos_en_excel(df, output_folder, file_name):
132
+ output_file_path = os.path.join(output_folder, f"resultados-{file_name}")
133
+ df.to_excel(output_file_path, index=False)
134
+ messagebox.showinfo("Información", f"Datos extraídos y guardados en '{output_file_path}'.")
135
+
136
+ def verificar_canciones_suficientes(df, num_playlists, min_songs):
137
+ total_unique_songs = len(df)
138
+ required_songs = num_playlists * min_songs
139
+ if total_unique_songs < required_songs:
140
+ return False, total_unique_songs, required_songs
141
+ return True, total_unique_songs, required_songs
142
+
143
+ def create_playlists(data, output_folder, num_playlists, min_songs):
144
+ max_artist_songs = 3
145
+ playlists = []
146
+ all_songs = data.sample(frac=1).reset_index(drop=True)
147
+ used_songs = set()
148
 
149
+ for _ in range(num_playlists):
150
+ playlist = []
151
+ artist_count = defaultdict(int)
152
+ last_artist = None
153
+ artist_queue = deque() # Para asegurar que el mismo artista no aparezca con menos de 4 filas de diferencia
154
+ available_songs = all_songs[~all_songs.index.isin(used_songs)]
155
+
156
+ for index, row in available_songs.iterrows():
157
+ artist = row['Artist']
158
+ if artist_count[artist] < max_artist_songs and (artist_queue.count(artist) == 0):
159
+ playlist.append(index)
160
+ artist_count[artist] += 1
161
+ if len(artist_queue) == 4:
162
+ artist_queue.popleft() # Mantener el tamaño del queue a 4
163
+ artist_queue.append(artist)
164
+ if len(playlist) == min_songs:
165
+ break
166
+
167
+ used_songs.update(playlist)
168
+ playlists.append(all_songs.loc[playlist])
169
+
170
+ if len(available_songs) < min_songs:
171
+ used_songs.clear() # Resetear los used_songs si no hay suficientes canciones disponibles para una nueva playlist
172
+
173
+ output_filepath = os.path.join(output_folder, "Playlists_final.xlsx")
174
+ with pd.ExcelWriter(output_filepath) as writer:
175
+ for i, pl in enumerate(playlists, 1):
176
+ pl.to_excel(writer, sheet_name=f'Playlist {i}', index=False)
177
+
178
+ print("Playlists created and saved successfully.")
179
+
180
+ def run_app():
181
+ root = tk.Tk()
182
+ root.withdraw()
183
+
184
+ progress = tk.DoubleVar()
185
+ progress_bar = ttk.Progressbar(root, variable=progress, maximum=100)
186
+ progress_bar.pack(fill=tk.X, expand=True)
187
+ root.deiconify()
188
+
189
+ usuario = simpledialog.askstring("Iniciar Sesión", "Usuario:", parent=root)
190
+ contrasena = simpledialog.askstring("Iniciar Sesión", "Contraseña:", parent=root, show='*')
191
+ if not usuario or not contrasena:
192
+ messagebox.showerror("Error", "Usuario o contraseña no proporcionados.")
193
+ root.destroy()
194
+ return
195
+
196
+ file_path = filedialog.askopenfilename(title="Selecciona el archivo Excel con ISRCs", filetypes=[("Excel files", "*.xlsx *.xls")])
197
+ if not file_path:
198
+ messagebox.showerror("Error", "No se seleccionó ningún archivo.")
199
+ root.destroy()
200
+ return
201
+
202
+ output_folder = filedialog.askdirectory(title="Selecciona la carpeta de salida para los datos de Excel")
203
+ if not output_folder:
204
+ messagebox.showerror("Error", "No se seleccionó ninguna carpeta de salida.")
205
+ root.destroy()
206
+ return
207
+
208
+ service = Service(ChromeDriverManager().install())
209
+ driver = webdriver.Chrome(service=service)
210
 
211
  try:
212
+ iniciar_sesion(driver, usuario, contrasena, progress, root)
213
+ all_data = procesar_isrc(driver, file_path, output_folder, progress, root)
214
+ if not all_data.empty:
215
+ num_playlists = simpledialog.askinteger("Número de Playlists", "Introduce el número de playlists:", parent=root)
216
+ min_songs = simpledialog.askinteger("Mínimo de Canciones", "Introduce el mínimo de canciones por playlist:", parent=root)
217
+
218
+ suficientes_canciones, total_canciones, requeridas_canciones = verificar_canciones_suficientes(all_data, num_playlists, min_songs)
219
+
220
+ if not suficientes_canciones:
221
+ respuesta = messagebox.askyesnocancel("Canciones Insuficientes", f"No hay suficientes canciones únicas para crear {num_playlists} playlists de {min_songs} canciones cada una.\n"
222
+ f"Total de canciones únicas: {total_canciones}\n"
223
+ f"Canciones requeridas: {requeridas_canciones}\n\n"
224
+ "¿Deseas continuar con las playlists completas posibles y una incompleta (Sí)?\n"
225
+ "¿Deseas continuar solo con las playlists completas posibles (No)?\n"
226
+ "¿Deseas cancelar la operación (Cancelar)?")
227
+
228
+ if respuesta is None:
229
+ root.destroy()
230
+ return
231
+ elif respuesta:
232
+ num_playlists = total_canciones // min_songs
233
+ else:
234
+ num_playlists = total_canciones // min_songs
235
+
236
+ create_playlists(all_data, output_folder, num_playlists, min_songs)
237
+ finally:
238
+ driver.quit()
239
+
240
+ root.mainloop()
241
+
242
+ if __name__ == "__main__":
243
+ run_app()