Spaces:

Sahibhim
/

YouTubeSummarizer

Runtime error

App Files Files Community

Sahibhim commited on Aug 20, 2025

Commit

1b8009f

verified ·

1 Parent(s): 5e1dd99

Create app.py

Browse files

Files changed (1) hide show

app.py +106 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# YouTubeSummerizer.py  — full, updated script (yt-dlp captions + DistilBART summary + Gradio UI)
+import requests
+import webvtt
+from yt_dlp import YoutubeDL
+import gradio as gr
+import torch
+from transformers import pipeline
+# ---------------- Summarizer ----------------
+# NOTE: Uses CPU by default. If you hit RAM errors, restart and it will stream-load weights.
+text_summary = pipeline(
+    "summarization",
+    model="sshleifer/distilbart-cnn-12-6",
+    torch_dtype=torch.bfloat16
+)
+def summarize_text(text: str) -> str:
+    # Light chunking for very long captions (kept simple)
+    words = text.split()
+    chunks, step = [], 350
+    for i in range(0, len(words), step):
+        chunks.append(" ".join(words[i:i+step]))
+    partial = []
+    for c in chunks:
+        out = text_summary(c, max_length=180, min_length=60, do_sample=False)
+        partial.append(out[0]["summary_text"])
+    merged = " ".join(partial)
+    if len(merged.split()) > 320:
+        return text_summary(merged, max_length=200, min_length=80, do_sample=False)[0]["summary_text"]
+    return merged
+# ---------------- Helpers to get captions via yt-dlp ----------------
+def _pick_caption_url(info, preferred=("en", "en-US", "en-GB")):
+    subs = info.get("subtitles") or {}
+    autos = info.get("automatic_captions") or {}
+    # Prefer manual → auto → any
+    for d in (subs, autos):
+        for code in preferred:
+            if code in d and d[code]:
+                return d[code][0]["url"]
+    for d in (subs, autos):
+        for tracks in d.values():
+            if tracks:
+                return tracks[0]["url"]
+    return None
+def _fetch_caption_text(video_url: str) -> str | None:
+    """
+    Try multiple ways to fetch a captions VTT URL, without spamming errors.
+    Order: Edge -> Firefox -> Chrome (Default/Profile 1) -> no cookies.
+    """
+    attempts = [
+        {"skip_download": True, "quiet": True, "cookiesfrombrowser": ("edge",)},
+        {"skip_download": True, "quiet": True, "cookiesfrombrowser": ("firefox",)},
+        {"skip_download": True, "quiet": True, "cookiesfrombrowser": ("chrome", "Default")},
+        {"skip_download": True, "quiet": True, "cookiesfrombrowser": ("chrome", "Profile 1")},
+        {"skip_download": True, "quiet": True},  # no cookies
+    ]
+    for opts in attempts:
+        try:
+            with YoutubeDL(opts) as ydl:
+                info = ydl.extract_info(video_url, download=False)
+            vtt_url = _pick_caption_url(info)
+            if not vtt_url:
+                continue
+            r = requests.get(vtt_url, timeout=20)
+            r.raise_for_status()
+            # Parse WebVTT text in-memory
+            lines = []
+            for cue in webvtt.from_string(r.text):
+                t = cue.text.strip().replace("\n", " ")
+                if t:
+                    lines.append(t)
+            result = " ".join(lines).strip()
+            if result:
+                return result
+        except Exception:
+            continue
+    return None
+# ---------------- Main function used by Gradio ----------------
+def get_youtube_transcript(video_url: str) -> str:
+    text = _fetch_caption_text(video_url)
+    if not text:
+        return "No captions available or captions endpoint blocked. Try another video, network, or export cookies to a cookies.txt file."
+    try:
+        return summarize_text(text)
+    except Exception as e:
+        return f"Summarizer error: {e}"
+# ---------------- Gradio UI ----------------
+gr.close_all()
+demo = gr.Interface(
+    fn=get_youtube_transcript,
+    inputs=[gr.Textbox(label="Input YouTube Url to summarize", lines=1, placeholder="https://www.youtube.com/watch?v=...")],
+    outputs=[gr.Textbox(label="Summarized text", lines=10)],
+    title="@Sahibhim GenAI Project 2: YouTube Script Summarizer",
+    description="Paste a YouTube link. App fetches captions (manual or auto) and summarizes them."
+)
+if __name__ == "__main__":
+    demo.launch()