Spaces:

PPloychor
/

Youtubetranscript

Sleeping

App Files Files Community

PPloychor commited on Nov 3, 2025

Commit

6c69ff9

verified ·

1 Parent(s): 6785e76

Create app.py

Browse files

Files changed (1) hide show

app.py +148 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import spaces
+import torch
+import gradio as gr
+import yt_dlp
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
+import tempfile
+import os
+import time
+import glob
+# --------------------------------------------------
+# CONFIG
+# --------------------------------------------------
+ASR_MODEL = "openai/whisper-large-v3"
+SUM_MODEL = "google/flan-t5-large"
+BATCH_SIZE = 8
+YT_LENGTH_LIMIT_S = 3600  # 1 hour max
+HAS_CUDA = torch.cuda.is_available()
+DEVICE = 0 if HAS_CUDA else "cpu"
+DTYPE = torch.float16 if HAS_CUDA else torch.float32
+# Speech-to-Text
+asr_pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=ASR_MODEL,
+    device=DEVICE,
+    torch_dtype=DTYPE,
+    chunk_length_s=30,
+)
+# Summarization
+sum_pipe = pipeline("summarization", model=SUM_MODEL, device=DEVICE)
+# --------------------------------------------------
+# HELPERS
+# --------------------------------------------------
+def _format_hms(sec: int) -> str:
+    return time.strftime("%H:%M:%S", time.gmtime(sec))
+def _embed(video_id: str) -> str:
+    return (
+        f'<center><iframe width="500" height="320" '
+        f'src="https://www.youtube.com/embed/{video_id}" '
+        f'frameborder="0" allowfullscreen></iframe></center>'
+    )
+def _download_audio(yt_url: str, out_dir: str) -> tuple[str, dict]:
+    """Download best-quality audio track."""
+    try:
+        with yt_dlp.YoutubeDL({"quiet": True}) as ydl:
+            info = ydl.extract_info(yt_url, download=False)
+    except yt_dlp.utils.DownloadError as err:
+        raise gr.Error(f"Cannot access YouTube URL: {err}")
+    duration = int(info.get("duration") or 0)
+    if duration > YT_LENGTH_LIMIT_S:
+        raise gr.Error(
+            f"Video too long: {_format_hms(duration)} > {_format_hms(YT_LENGTH_LIMIT_S)}"
+        )
+    outtmpl = os.path.join(out_dir, "audio.%(ext)s")
+    opts = {"format": "bestaudio/best", "outtmpl": outtmpl, "quiet": True, "noprogress": True}
+    with yt_dlp.YoutubeDL(opts) as ydl:
+        ydl.download([yt_url])
+    matches = glob.glob(os.path.join(out_dir, "audio.*"))
+    if not matches:
+        raise gr.Error("Failed to download audio track.")
+    return matches[0], info
+# --------------------------------------------------
+# MAIN FUNCTIONS
+# --------------------------------------------------
+@spaces.GPU
+def transcribe_local(inputs, task):
+    if inputs is None:
+        raise gr.Error("Please upload or record an audio file.")
+    with open(inputs, "rb") as f:
+        data = f.read()
+    audio = ffmpeg_read(data, asr_pipe.feature_extractor.sampling_rate)
+    inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate}
+    out = asr_pipe(inputs, batch_size=BATCH_SIZE,
+                   generate_kwargs={"task": task}, return_timestamps=True)
+    return out["text"]
+@spaces.GPU
+def transcribe_youtube(yt_url, task):
+    if not yt_url:
+        raise gr.Error("Paste a valid YouTube URL.")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        audio_path, info = _download_audio(yt_url, tmpdir)
+        with open(audio_path, "rb") as f:
+            data = f.read()
+        audio = ffmpeg_read(data, asr_pipe.feature_extractor.sampling_rate)
+        inputs = {"array": audio, "sampling_rate": asr_pipe.feature_extractor.sampling_rate}
+        out = asr_pipe(inputs, batch_size=BATCH_SIZE,
+                       generate_kwargs={"task": task}, return_timestamps=True)
+        text = out["text"]
+        txt_path = os.path.join(tmpdir, "transcript.txt")
+        with open(txt_path, "w", encoding="utf-8") as f:
+            f.write(text)
+        vid = info.get("id", "")
+        html = _embed(vid) if vid else ""
+        return html, text, txt_path
+def summarize_text(text):
+    if not text.strip():
+        raise gr.Error("No transcript provided.")
+    chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
+    summaries = [sum_pipe(ch)[0]["summary_text"] for ch in chunks]
+    return " ".join(summaries)
+# --------------------------------------------------
+# UI
+# --------------------------------------------------
+with gr.Blocks(title="YouTube → Transcript → Summary") as demo:
+    gr.Markdown("## 🎬 Whisper V3 + Flan-T5 – YouTube Transcriber & Summarizer")
+    with gr.Tab("🎙️ Microphone"):
+        mic_audio = gr.Audio(sources="microphone", type="filepath")
+        mic_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
+        mic_out = gr.Textbox(label="Transcript")
+        gr.Button("Run").click(fn=transcribe_local, inputs=[mic_audio, mic_task], outputs=mic_out)
+    with gr.Tab("📁 Audio file"):
+        file_audio = gr.Audio(sources="upload", type="filepath")
+        file_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
+        file_out = gr.Textbox(label="Transcript")
+        gr.Button("Run").click(fn=transcribe_local, inputs=[file_audio, file_task], outputs=file_out)
+    with gr.Tab("🎬 YouTube"):
+        yt_url = gr.Textbox(lines=1, placeholder="Paste YouTube URL here", label="YouTube URL")
+        yt_task = gr.Radio(["transcribe", "translate"], value="transcribe", label="Task")
+        yt_video = gr.HTML(label="Video Preview")
+        yt_text = gr.Textbox(label="Transcript", lines=10)
+        yt_file = gr.File(label="Download Transcript (.txt)")
+        gr.Button("Transcribe").click(fn=transcribe_youtube,
+                                      inputs=[yt_url, yt_task],
+                                      outputs=[yt_video, yt_text, yt_file])
+        gr.Markdown("---")
+        gr.Markdown("### 🧠 Summarize Transcript")
+        sum_out = gr.Textbox(label="Summary", lines=6)
+        gr.Button("Summarize Text").click(fn=summarize_text, inputs=yt_text, outputs=sum_out)
+demo.queue().launch()