Update app.py
Browse files
app.py
CHANGED
|
@@ -32,29 +32,65 @@ def slugify(text, max_length=50):
|
|
| 32 |
text = re.sub(r'[-\s]+', '_', text)
|
| 33 |
return text[:max_length].strip('_')
|
| 34 |
|
| 35 |
-
def
|
| 36 |
-
|
|
|
|
| 37 |
response.raise_for_status()
|
| 38 |
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
podcast_title = slugify(title_tag.get_text()) if title_tag else "podcast"
|
| 43 |
-
|
| 44 |
-
# Liens MP3
|
| 45 |
-
mp3_links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith('.mp3')]
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
def download_and_zip_podcast(url):
|
| 50 |
-
try:
|
| 51 |
-
podcast_title, mp3_links = extract_mp3_links_and_title(url)
|
| 52 |
if not mp3_links:
|
| 53 |
-
return "Aucun fichier MP3
|
| 54 |
|
| 55 |
temp_dir = tempfile.mkdtemp()
|
| 56 |
for i, mp3_url in enumerate(mp3_links, start=1):
|
| 57 |
-
filename = f"{
|
| 58 |
filepath = os.path.join(temp_dir, filename)
|
| 59 |
with requests.get(mp3_url, stream=True) as r:
|
| 60 |
r.raise_for_status()
|
|
@@ -62,29 +98,29 @@ def download_and_zip_podcast(url):
|
|
| 62 |
for chunk in r.iter_content(chunk_size=8192):
|
| 63 |
f.write(chunk)
|
| 64 |
|
| 65 |
-
zip_path = os.path.join(temp_dir, f"{
|
| 66 |
shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)
|
| 67 |
|
| 68 |
-
return f"{len(mp3_links)}
|
| 69 |
|
| 70 |
except Exception as e:
|
| 71 |
return f"Erreur : {str(e)}", None
|
| 72 |
|
| 73 |
# === INTERFACE GRADIO ===
|
| 74 |
with gr.Blocks() as app:
|
| 75 |
-
gr.Markdown("# Téléchargeur de Podcasts MP3")
|
| 76 |
with gr.Row():
|
| 77 |
-
url_input = gr.Textbox(label="URL de la
|
| 78 |
download_button = gr.Button("Télécharger et compresser")
|
| 79 |
output_text = gr.Textbox(label="Message")
|
| 80 |
file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])
|
| 81 |
|
| 82 |
def process(url):
|
| 83 |
-
message, zip_file =
|
| 84 |
return message, zip_file
|
| 85 |
|
| 86 |
download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])
|
| 87 |
|
| 88 |
-
# === LANCEMENT
|
| 89 |
if __name__ == "__main__":
|
| 90 |
-
app.launch(share=True)
|
|
|
|
| 32 |
text = re.sub(r'[-\s]+', '_', text)
|
| 33 |
return text[:max_length].strip('_')
|
| 34 |
|
| 35 |
+
def get_episode_links(main_url):
|
| 36 |
+
"""Récupère toutes les URL des pages d’épisodes depuis la page principale."""
|
| 37 |
+
response = requests.get(main_url)
|
| 38 |
response.raise_for_status()
|
| 39 |
soup = BeautifulSoup(response.text, 'html.parser')
|
| 40 |
+
links = []
|
| 41 |
+
|
| 42 |
+
for a in soup.find_all('a', href=True):
|
| 43 |
+
href = a['href']
|
| 44 |
+
if "/podcasts/" in href and href != main_url:
|
| 45 |
+
full_url = href if href.startswith("http") else f"https://www.radiofrance.fr{href}"
|
| 46 |
+
if full_url not in links:
|
| 47 |
+
links.append(full_url)
|
| 48 |
+
|
| 49 |
+
return list(dict.fromkeys(links)) # dédoublonner
|
| 50 |
+
|
| 51 |
+
def extract_mp3_from_episode(url):
|
| 52 |
+
"""Extrait le lien MP3 d’un épisode."""
|
| 53 |
+
try:
|
| 54 |
+
response = requests.get(url)
|
| 55 |
+
response.raise_for_status()
|
| 56 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 57 |
+
audio_tag = soup.find("audio")
|
| 58 |
+
if audio_tag and audio_tag.get("src", "").endswith(".mp3"):
|
| 59 |
+
return audio_tag["src"]
|
| 60 |
+
except Exception:
|
| 61 |
+
pass
|
| 62 |
+
return None
|
| 63 |
+
|
| 64 |
+
def get_podcast_title(url):
|
| 65 |
+
"""Extrait le titre général du podcast."""
|
| 66 |
+
try:
|
| 67 |
+
response = requests.get(url)
|
| 68 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 69 |
+
title_tag = soup.find('h1') or soup.find('title')
|
| 70 |
+
return slugify(title_tag.get_text()) if title_tag else "podcast"
|
| 71 |
+
except Exception:
|
| 72 |
+
return "podcast"
|
| 73 |
+
|
| 74 |
+
def download_and_zip_podcast_series(main_url):
|
| 75 |
+
try:
|
| 76 |
+
title = get_podcast_title(main_url)
|
| 77 |
+
episode_pages = get_episode_links(main_url)
|
| 78 |
|
| 79 |
+
if not episode_pages:
|
| 80 |
+
return "Aucune page d’épisode trouvée.", None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
mp3_links = []
|
| 83 |
+
for ep_url in episode_pages:
|
| 84 |
+
mp3 = extract_mp3_from_episode(ep_url)
|
| 85 |
+
if mp3:
|
| 86 |
+
mp3_links.append(mp3)
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
if not mp3_links:
|
| 89 |
+
return "Aucun fichier MP3 trouvé dans les épisodes.", None
|
| 90 |
|
| 91 |
temp_dir = tempfile.mkdtemp()
|
| 92 |
for i, mp3_url in enumerate(mp3_links, start=1):
|
| 93 |
+
filename = f"{title}_{i:02}.mp3"
|
| 94 |
filepath = os.path.join(temp_dir, filename)
|
| 95 |
with requests.get(mp3_url, stream=True) as r:
|
| 96 |
r.raise_for_status()
|
|
|
|
| 98 |
for chunk in r.iter_content(chunk_size=8192):
|
| 99 |
f.write(chunk)
|
| 100 |
|
| 101 |
+
zip_path = os.path.join(temp_dir, f"{title}.zip")
|
| 102 |
shutil.make_archive(zip_path.replace('.zip', ''), 'zip', temp_dir)
|
| 103 |
|
| 104 |
+
return f"{len(mp3_links)} épisode(s) téléchargé(s).", zip_path
|
| 105 |
|
| 106 |
except Exception as e:
|
| 107 |
return f"Erreur : {str(e)}", None
|
| 108 |
|
| 109 |
# === INTERFACE GRADIO ===
|
| 110 |
with gr.Blocks() as app:
|
| 111 |
+
gr.Markdown("# Téléchargeur de Podcasts MP3 - France Culture")
|
| 112 |
with gr.Row():
|
| 113 |
+
url_input = gr.Textbox(label="URL de la série", placeholder="https://www.radiofrance.fr/...")
|
| 114 |
download_button = gr.Button("Télécharger et compresser")
|
| 115 |
output_text = gr.Textbox(label="Message")
|
| 116 |
file_output = gr.File(label="Fichier ZIP", file_types=[".zip"])
|
| 117 |
|
| 118 |
def process(url):
|
| 119 |
+
message, zip_file = download_and_zip_podcast_series(url)
|
| 120 |
return message, zip_file
|
| 121 |
|
| 122 |
download_button.click(fn=process, inputs=[url_input], outputs=[output_text, file_output])
|
| 123 |
|
| 124 |
+
# === LANCEMENT ===
|
| 125 |
if __name__ == "__main__":
|
| 126 |
+
app.launch(share=True)
|