Update app.py
Browse files
app.py
CHANGED
|
@@ -4,81 +4,88 @@ import zipfile
|
|
| 4 |
import requests
|
| 5 |
import tempfile
|
| 6 |
import subprocess
|
| 7 |
-
from bs4 import BeautifulSoup
|
| 8 |
import gradio as gr
|
| 9 |
|
| 10 |
-
# Installation automatique des dépendances
|
| 11 |
try:
|
| 12 |
import bs4
|
| 13 |
except ImportError:
|
| 14 |
subprocess.run(["pip", "install", "-q", "gradio", "beautifulsoup4", "requests"])
|
| 15 |
|
| 16 |
def sanitize_filename(name):
|
|
|
|
| 17 |
return re.sub(r"[^\w\-_.]", "_", name.strip())[:50]
|
| 18 |
|
| 19 |
-
def
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
try:
|
| 32 |
response = requests.get(url)
|
| 33 |
response.raise_for_status()
|
| 34 |
except Exception as e:
|
| 35 |
-
return f"Erreur de téléchargement : {e}", None
|
| 36 |
|
| 37 |
html_text = response.text
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
if not titles_links:
|
| 42 |
-
# fallback brut si les titres ne sont pas extraits
|
| 43 |
-
urls = extract_audio_links_from_html(html_text)
|
| 44 |
-
titles_links = [(f"track_{i+1:02d}", u) for i, u in enumerate(urls)]
|
| 45 |
-
|
| 46 |
-
if not titles_links:
|
| 47 |
-
return "Aucun fichier audio trouvé avec ITEMA dans l'URL", None
|
| 48 |
|
| 49 |
-
#
|
| 50 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 51 |
-
zip_path = os.path.join(tmpdir, "
|
| 52 |
with zipfile.ZipFile(zip_path, "w") as zipf:
|
| 53 |
-
for idx, (
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
filepath = os.path.join(tmpdir, filename)
|
| 57 |
try:
|
| 58 |
-
|
|
|
|
| 59 |
audio_resp.raise_for_status()
|
|
|
|
|
|
|
|
|
|
| 60 |
with open(filepath, "wb") as f:
|
| 61 |
f.write(audio_resp.content)
|
| 62 |
zipf.write(filepath, arcname=filename)
|
| 63 |
except Exception as e:
|
| 64 |
-
print(f"Erreur téléchargement {
|
| 65 |
|
| 66 |
-
return "Téléchargement terminé avec succès", zip_path
|
| 67 |
|
| 68 |
def gradio_interface(url):
|
| 69 |
-
message, zip_file =
|
| 70 |
return message, zip_file
|
| 71 |
|
| 72 |
# Interface Gradio
|
| 73 |
demo = gr.Interface(
|
| 74 |
fn=gradio_interface,
|
| 75 |
-
inputs=gr.Textbox(label="URL de la page
|
| 76 |
outputs=[
|
| 77 |
gr.Textbox(label="Message"),
|
| 78 |
-
gr.File(label="Fichier ZIP
|
| 79 |
],
|
| 80 |
-
title="
|
| 81 |
-
description="Collez une URL
|
| 82 |
)
|
| 83 |
|
| 84 |
if __name__ == "__main__":
|
|
|
|
| 4 |
import requests
|
| 5 |
import tempfile
|
| 6 |
import subprocess
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
|
| 9 |
+
# Installation automatique des dépendances si nécessaire
|
| 10 |
try:
|
| 11 |
import bs4
|
| 12 |
except ImportError:
|
| 13 |
subprocess.run(["pip", "install", "-q", "gradio", "beautifulsoup4", "requests"])
|
| 14 |
|
| 15 |
def sanitize_filename(name):
|
| 16 |
+
# Rend le nom de fichier compatible avec tous les OS
|
| 17 |
return re.sub(r"[^\w\-_.]", "_", name.strip())[:50]
|
| 18 |
|
| 19 |
+
def extract_mp3_links_and_titles(html_text):
|
| 20 |
+
# Expression pour trouver les URL MP3
|
| 21 |
+
mp3_pattern = re.compile(r'https?://[^\s"\'<>]+\.mp3')
|
| 22 |
+
mp3_links = mp3_pattern.findall(html_text)
|
| 23 |
|
| 24 |
+
# Expression pour tenter d'extraire les titres associés
|
| 25 |
+
item_pattern = re.compile(
|
| 26 |
+
r'title:"\\?"([^"]+)\\?".*?url:"(https?://[^\s"\'<>]+\.mp3)"',
|
| 27 |
+
re.DOTALL
|
| 28 |
+
)
|
| 29 |
+
titled_links = {match[1]: match[0] for match in item_pattern.findall(html_text)}
|
| 30 |
|
| 31 |
+
results = []
|
| 32 |
+
for link in mp3_links:
|
| 33 |
+
title = titled_links.get(link, None)
|
| 34 |
+
results.append((link, title))
|
| 35 |
+
return results
|
| 36 |
+
|
| 37 |
+
def download_and_zip_mp3s(url):
|
| 38 |
try:
|
| 39 |
response = requests.get(url)
|
| 40 |
response.raise_for_status()
|
| 41 |
except Exception as e:
|
| 42 |
+
return f"Erreur de téléchargement de la page : {e}", None
|
| 43 |
|
| 44 |
html_text = response.text
|
| 45 |
+
mp3_entries = extract_mp3_links_and_titles(html_text)
|
| 46 |
|
| 47 |
+
if not mp3_entries:
|
| 48 |
+
return "Aucun lien .mp3 trouvé sur cette page.", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# Crée un ZIP dans un dossier temporaire
|
| 51 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 52 |
+
zip_path = os.path.join(tmpdir, "episodes_radiofrance.zip")
|
| 53 |
with zipfile.ZipFile(zip_path, "w") as zipf:
|
| 54 |
+
for idx, (mp3_url, title) in enumerate(mp3_entries, 1):
|
| 55 |
+
if title:
|
| 56 |
+
filename = f"{idx:02d}-{sanitize_filename(title)}.mp3"
|
| 57 |
+
else:
|
| 58 |
+
filename = f"{idx:02d}-episode.mp3"
|
| 59 |
filepath = os.path.join(tmpdir, filename)
|
| 60 |
try:
|
| 61 |
+
print(f"Téléchargement : {mp3_url}")
|
| 62 |
+
audio_resp = requests.get(mp3_url)
|
| 63 |
audio_resp.raise_for_status()
|
| 64 |
+
if len(audio_resp.content) < 30_000:
|
| 65 |
+
print(f"Fichier trop petit, ignoré : {mp3_url}")
|
| 66 |
+
continue
|
| 67 |
with open(filepath, "wb") as f:
|
| 68 |
f.write(audio_resp.content)
|
| 69 |
zipf.write(filepath, arcname=filename)
|
| 70 |
except Exception as e:
|
| 71 |
+
print(f"Erreur lors du téléchargement de {mp3_url} : {e}")
|
| 72 |
|
| 73 |
+
return "Téléchargement terminé avec succès.", zip_path
|
| 74 |
|
| 75 |
def gradio_interface(url):
|
| 76 |
+
message, zip_file = download_and_zip_mp3s(url)
|
| 77 |
return message, zip_file
|
| 78 |
|
| 79 |
# Interface Gradio
|
| 80 |
demo = gr.Interface(
|
| 81 |
fn=gradio_interface,
|
| 82 |
+
inputs=gr.Textbox(label="URL de la page contenant des MP3"),
|
| 83 |
outputs=[
|
| 84 |
gr.Textbox(label="Message"),
|
| 85 |
+
gr.File(label="Fichier ZIP")
|
| 86 |
],
|
| 87 |
+
title="Extracteur MP3 Radio France (ou autre)",
|
| 88 |
+
description="Collez une URL contenant des fichiers MP3, et récupérez-les dans un ZIP avec titres et numérotation."
|
| 89 |
)
|
| 90 |
|
| 91 |
if __name__ == "__main__":
|