Spaces:

KaiserShultz
/

Ankelodon_AI_Multi_task_agentic_system

Sleeping

App Files Files Community

KaiserShultz commited on Sep 23, 2025

Commit

8d9faa6

verified ·

1 Parent(s): c03e4a6

Update src/tools/youtube_transcript.py

Browse files

Files changed (1) hide show

src/tools/youtube_transcript.py +72 -0

src/tools/youtube_transcript.py CHANGED Viewed

	@@ -0,0 +1,72 @@

+from langchain.tools import tool
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi
+except Exception:
+    YouTubeTranscriptApi = None
+import re
+from urllib.parse import urlparse, parse_qs
+def _extract_video_id(url_or_id: str) -> str | None:
+    s = (url_or_id or "").strip()
+    if re.fullmatch(r"[A-Za-z0-9_-]{11}", s):
+        return s
+    u = urlparse(s)
+    # youtu.be/<id>
+    if u.netloc.endswith("youtu.be"):
+        vid = u.path.strip("/").split("/")[0]
+        return vid if re.fullmatch(r"[A-Za-z0-9_-]{11}", vid) else None
+    # watch?v=<id>
+    qs = parse_qs(u.query or "")
+    if "v" in qs:
+        vid = qs["v"][0]
+        return vid if re.fullmatch(r"[A-Za-z0-9_-]{11}", vid) else None
+    # /embed/<id>, /shorts/<id>, /v/<id>
+    for pref in ("/embed/", "/shorts/", "/v/"):
+        if u.path.startswith(pref):
+            vid = u.path[len(pref):].split("/")[0]
+            return vid if re.fullmatch(r"[A-Za-z0-9_-]{11}", vid) else None
+    return None
+@tool
+def extract_youtube_transcript(url: str, chars: int = 1000) -> str:
+    """
+    Simple YouTube transcript fetcher.
+    Input:
+      - url: Regular YouTube URL (or the 11-char video_id).
+      - chars: Return the first `chars` characters of the transcript.
+    Output:
+      - String with the transcript (trimmed to `chars`), or an error string:
+        "yt_error:<reason>"
+    """
+    if YouTubeTranscriptApi is None:
+        return "yt_error:missing_dependency"
+    vid = _extract_video_id(url)
+    if not vid:
+        return "yt_error:id_not_found"
+    try:
+        api = YouTubeTranscriptApi()
+        # New API returns a list of FetchedTranscriptSnippet objects
+        snippets = api.fetch(vid)
+        parts = []
+        for s in snippets:
+            # Support both object (new) and dict (old) shapes
+            text = getattr(s, "text", None)
+            if text is None and isinstance(s, dict):
+                text = s.get("text")
+            if not text:
+                continue
+            parts.append(text.replace("\n", " ").strip())
+        full_text = " ".join(p for p in parts if p)
+        return full_text[: max(0, int(chars))]
+    except Exception as e:
+        return f"yt_error:{type(e).__name__}:{e}"