Spaces:

NoLev
/

mp3transcriber

Sleeping

App Files Files Community

NoLev commited on Oct 11, 2025

Commit

f977db3

verified ·

1 Parent(s): 7d6b2b0

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -37

app.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import gradio as gr
 from transformers import pipeline
 import torch
 # Global cache for pipelines to avoid reloading models
 pipelines = {}
@@ -26,42 +31,95 @@ def get_pipeline(model_id):
         )
     return pipelines[model_id]
-# Transcription function with chunking for long audio
-def transcribe_speech(audio_file, model_id, language="english", return_timestamps=False):
-    if audio_file is None:
-        return "Please upload an audio file."
-    pipe = get_pipeline(model_id)
-    # Generate kwargs for transcription
-    generate_kwargs = {"task": "transcribe", "language": language}
-    if return_timestamps:
-        generate_kwargs["return_timestamps"] = True
-    # Transcribe with chunking for long files
-    output = pipe(
-        audio_file,
-        max_new_tokens=128,  # Per chunk for stability
-        generate_kwargs=generate_kwargs,
-        chunk_length_s=30,
-        stride_length_s=5,  # Overlap for smooth transitions
-        batch_size=8 if "tiny" not in model_id and "base" not in model_id else 16,  # Adjust batch for smaller models
-        return_timestamps=return_timestamps,
-    )
-    if return_timestamps:
-        # Format with timestamps if requested
-        if "chunks" in output:
-            formatted = []
-            for chunk in output["chunks"]:
-                start = f"{chunk['timestamp'][0]:.2f}s" if chunk['timestamp'][0] is not None else "0.00s"
-                end = f"{chunk['timestamp'][1]:.2f}s" if chunk['timestamp'][1] is not None else "?.?s"
-                formatted.append(f"[{start} - {end}] {chunk['text']}")
-            return "\n".join(formatted)
         else:
-            return output["text"]  # Fallback
-    else:
-        return output["text"]
 # Create the Gradio app with a colorful, responsive theme
 theme = gr.themes.Soft(
@@ -75,7 +133,7 @@ with gr.Blocks(theme=theme, title="MP3 to Text Transcriber") as demo:
     gr.Markdown(
         """
         # 🎤 MP3 to Text Transcription Tool
-        Upload an MP3 (or any audio file) and transcribe it to text using OpenAI's Whisper models.
         Supports long files up to hours—handles 45+ minutes effortlessly!
         Choose a model for speed vs. accuracy trade-off.
         """,
@@ -84,6 +142,7 @@ with gr.Blocks(theme=theme, title="MP3 to Text Transcriber") as demo:
     with gr.Row(variant="panel", elem_classes=["max-w-4xl mx-auto"]):
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 sources="upload",
                 type="filepath",
@@ -91,6 +150,13 @@ with gr.Blocks(theme=theme, title="MP3 to Text Transcriber") as demo:
                 elem_classes=["w-full"]
             )
             model_dropdown = gr.Dropdown(
                 choices=MODEL_OPTIONS,
                 value=MODEL_OPTIONS[1],  # Default to base
@@ -111,12 +177,15 @@ with gr.Blocks(theme=theme, title="MP3 to Text Transcriber") as demo:
                 value=False,
                 info="Adds [start - end] tags to the transcript."
             )
-            transcribe_btn = gr.Button("🚀 Transcribe Audio", variant="primary", size="lg", elem_classes=["w-full"])
         with gr.Column(scale=1):
             status_output = gr.Markdown("Ready to transcribe! 💬", elem_classes=["text-center"])
     transcript_output = gr.Textbox(
         label="📝 Transcript",
         lines=15,
@@ -130,11 +199,23 @@ with gr.Blocks(theme=theme, title="MP3 to Text Transcriber") as demo:
     def update_status(msg):
         return gr.Markdown(f"**{msg}**")
     transcribe_btn.click(
-        fn=transcribe_speech,
         inputs=[audio_input, model_dropdown, language_dropdown, timestamps_checkbox],
-        outputs=transcript_output,
-        show_progress=True  # Progress bar for long transcriptions
     ).then(
         fn=lambda: update_status("Transcription complete! 🎉"),
         outputs=status_output

 import gradio as gr
 from transformers import pipeline
 import torch
+import requests
+import re
+import tempfile
+import os
+from io import BytesIO
 # Global cache for pipelines to avoid reloading models
 pipelines = {}
         )
     return pipelines[model_id]
+# Function to fetch MP3 from Apple Podcasts episode URL
+def fetch_podcast_mp3(podcast_url):
+    if not podcast_url or "podcasts.apple.com" not in podcast_url:
+        return None, "Invalid Apple Podcasts URL. Please use a valid episode link (e.g., https://podcasts.apple.com/...)."
+    try:
+        # Fetch the episode page HTML
+        response = requests.get(podcast_url, headers={"User-Agent": "Mozilla/5.0 (compatible; PodcastTranscriber/1.0)"})
+        response.raise_for_status()
+        html = response.text
+        # Extract assetUrl (MP3) using regex - looks like "assetUrl":"https://...mp3"
+        match = re.search(r'"assetUrl"\s*:\s*"([^"]+\.mp3[^"]*)"', html)
+        if not match:
+            return None, "Could not find MP3 URL. The episode might be private or the page structure changed."
+        mp3_url = match.group(1)
+        # Download MP3 to temp file
+        mp3_response = requests.get(mp3_url, headers={"User-Agent": "Mozilla/5.0 (compatible; PodcastTranscriber/1.0)"})
+        mp3_response.raise_for_status()
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+            tmp_file.write(mp3_response.content)
+            temp_path = tmp_file.name
+        return temp_path, f"Downloaded episode: {os.path.getsize(temp_path) / (1024*1024):.1f} MB"
+    except Exception as e:
+        return None, f"Error fetching MP3: {str(e)}"
+# Transcription function with chunking for long audio
+def transcribe_speech(audio_input, model_id, language="english", return_timestamps=False, podcast_url=None):
+    audio_file = None
+    status_msg = ""
+    # If podcast URL provided, fetch MP3 first
+    if podcast_url:
+        audio_file, status_msg = fetch_podcast_mp3(podcast_url)
+        if not audio_file:
+            return status_msg, status_msg  # Error message
+    else:
+        # Use uploaded file
+        if audio_input is None:
+            return "Please upload an audio file or provide a podcast URL.", "Ready to transcribe! 💬"
+        audio_file = audio_input
+    try:
+        pipe = get_pipeline(model_id)
+        # Generate kwargs for transcription
+        generate_kwargs = {"task": "transcribe", "language": language}
+        if return_timestamps:
+            generate_kwargs["return_timestamps"] = True
+        # Transcribe with chunking for long files
+        output = pipe(
+            audio_file,
+            max_new_tokens=128,  # Per chunk for stability
+            generate_kwargs=generate_kwargs,
+            chunk_length_s=30,
+            stride_length_s=5,  # Overlap for smooth transitions
+            batch_size=8 if "tiny" not in model_id and "base" not in model_id else 16,  # Adjust batch for smaller models
+            return_timestamps=return_timestamps,
+        )
+        # Clean up temp file if it was downloaded
+        if podcast_url and os.path.exists(audio_file):
+            os.unlink(audio_file)
+        if return_timestamps:
+            # Format with timestamps if requested
+            if "chunks" in output:
+                formatted = []
+                for chunk in output["chunks"]:
+                    start = f"{chunk['timestamp'][0]:.2f}s" if chunk['timestamp'][0] is not None else "0.00s"
+                    end = f"{chunk['timestamp'][1]:.2f}s" if chunk['timestamp'][1] is not None else "?.?s"
+                    formatted.append(f"[{start} - {end}] {chunk['text']}")
+                return "\n".join(formatted), "Transcription complete with timestamps! 🎉"
+            else:
+                return output["text"], "Transcription complete! 🎉"
         else:
+            return output["text"], "Transcription complete! 🎉"
+    except Exception as e:
+        # Clean up on error
+        if podcast_url and os.path.exists(audio_file):
+            os.unlink(audio_file)
+        return f"Transcription error: {str(e)}", f"Error: {str(e)}"
 # Create the Gradio app with a colorful, responsive theme
 theme = gr.themes.Soft(
     gr.Markdown(
         """
         # 🎤 MP3 to Text Transcription Tool
+        Upload an MP3 (or any audio file) **or** paste an Apple Podcasts episode URL to fetch and transcribe it automatically!
         Supports long files up to hours—handles 45+ minutes effortlessly!
         Choose a model for speed vs. accuracy trade-off.
         """,
     with gr.Row(variant="panel", elem_classes=["max-w-4xl mx-auto"]):
         with gr.Column(scale=1):
+            # Option 1: File upload
             audio_input = gr.Audio(
                 sources="upload",
                 type="filepath",
                 elem_classes=["w-full"]
             )
+            # Option 2: Podcast URL
+            podcast_input = gr.Textbox(
+                label="🔗 Apple Podcasts Episode URL (optional)",
+                placeholder="e.g., https://podcasts.apple.com/us/podcast/example/id123?i=456",
+                elem_classes=["w-full"]
+            )
             model_dropdown = gr.Dropdown(
                 choices=MODEL_OPTIONS,
                 value=MODEL_OPTIONS[1],  # Default to base
                 value=False,
                 info="Adds [start - end] tags to the transcript."
             )
         with gr.Column(scale=1):
             status_output = gr.Markdown("Ready to transcribe! 💬", elem_classes=["text-center"])
+    # Buttons
+    with gr.Row(elem_classes=["w-full"]):
+        transcribe_btn = gr.Button("🚀 Transcribe Uploaded File", variant="secondary", elem_classes=["flex-1"])
+        podcast_btn = gr.Button("📡 Fetch & Transcribe Podcast", variant="primary", elem_classes=["flex-1"])
     transcript_output = gr.Textbox(
         label="📝 Transcript",
         lines=15,
     def update_status(msg):
         return gr.Markdown(f"**{msg}**")
+    # For uploaded file
     transcribe_btn.click(
+        fn=lambda audio, model, lang, ts: transcribe_speech(audio, model, lang, ts, None),
         inputs=[audio_input, model_dropdown, language_dropdown, timestamps_checkbox],
+        outputs=[transcript_output, status_output],
+        show_progress=True
+    ).then(
+        fn=lambda: update_status("Transcription complete! 🎉"),
+        outputs=status_output
+    )
+    # For podcast URL
+    podcast_btn.click(
+        fn=lambda url, model, lang, ts: transcribe_speech(None, model, lang, ts, url),
+        inputs=[podcast_input, model_dropdown, language_dropdown, timestamps_checkbox],
+        outputs=[transcript_output, status_output],
+        show_progress=True
     ).then(
         fn=lambda: update_status("Transcription complete! 🎉"),
         outputs=status_output