Spaces:

gabejavitt
/

agentCourse

Sleeping

App Files Files Community

gabejavitt commited on Nov 5, 2025

Commit

d63a5de

verified ·

1 Parent(s): ed23d35

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -8

app.py CHANGED Viewed

@@ -23,6 +23,8 @@ from bs4 import BeautifulSoup
 import requests
 from PIL import Image
 import base64
 # LangChain & LangGraph
 from langgraph.graph.message import add_messages
@@ -555,13 +557,14 @@ class YoutubeInput(BaseModel):
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
-    """Fetches YouTube video transcript."""
     if not video_url:
         return "Error: Invalid URL."
     print(f"📺 YouTube transcript: {video_url}")
     try:
         video_id = None
         if "watch?v=" in video_url:
             video_id = video_url.split("v=")[1].split("&")[0]
@@ -570,15 +573,54 @@ def get_youtube_transcript(video_url: str) -> str:
         if not video_id:
             return f"Error: Could not extract video ID."
-        # FIXED: Use get_transcript instead of list_transcripts
-        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
-        if not transcript_list:
-            return "Error: No transcript found."
-        full_transcript = " ".join([item["text"] for item in transcript_list])
         return f"Transcript:\n{truncate_if_needed(full_transcript)}"
     except Exception as e:
         return f"Transcript error: {str(e)}"

 import requests
 from PIL import Image
 import base64
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
 # LangChain & LangGraph
 from langgraph.graph.message import add_messages
 @tool(args_schema=YoutubeInput)
 def get_youtube_transcript(video_url: str) -> str:
+    """Fetches YouTube video transcript using official API."""
     if not video_url:
         return "Error: Invalid URL."
     print(f"📺 YouTube transcript: {video_url}")
     try:
+        # Extract video ID
         video_id = None
         if "watch?v=" in video_url:
             video_id = video_url.split("v=")[1].split("&")[0]
         if not video_id:
             return f"Error: Could not extract video ID."
+        # Get API key
+        YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")
+        if not YOUTUBE_API_KEY:
+            return "Error: YOUTUBE_API_KEY not set in environment."
+        # Build YouTube API client
+        youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
+        # Get captions list
+        captions_response = youtube.captions().list(
+            part='snippet',
+            videoId=video_id
+        ).execute()
+        if not captions_response.get('items'):
+            return "Error: No captions available for this video."
+        # Find English caption track
+        caption_id = None
+        for caption in captions_response['items']:
+            if caption['snippet']['language'] == 'en':
+                caption_id = caption['id']
+                break
+        if not caption_id:
+            # Try first available caption
+            caption_id = captions_response['items'][0]['id']
+        # Download caption
+        caption_download = youtube.captions().download(
+            id=caption_id,
+            tfmt='srt'  # or 'vtt'
+        ).execute()
+        # Parse SRT format to plain text
+        import re
+        text_lines = []
+        for line in caption_download.decode('utf-8').split('\n'):
+            # Skip timestamp lines and sequence numbers
+            if not re.match(r'^\d+$', line) and not re.match(r'\d{2}:\d{2}:\d{2}', line) and line.strip():
+                text_lines.append(line.strip())
+        full_transcript = " ".join(text_lines)
         return f"Transcript:\n{truncate_if_needed(full_transcript)}"
+    except HttpError as e:
+        return f"YouTube API error: {e}"
     except Exception as e:
         return f"Transcript error: {str(e)}"