Agents_Course_Final_Assignment

Sleeping

App Files Files Community

SerotoninRonin commited on Jul 8, 2025

Commit

da66358

1 Parent(s): 4ad672b

Add YouTube transcript extraction tool and update imports

Browse files

Files changed (3) hide show

agents.py +4 -2
requirements.txt +2 -1
tools.py +31 -1

agents.py CHANGED Viewed

@@ -11,7 +11,8 @@ from tools import (
     webpage_extraction_tool,
     brave_web_search,
     python_code_interpreter_tool,
-    audio_file_transcriber
 )
 class AgentState(TypedDict):
@@ -26,7 +27,8 @@ tools = [
     webpage_extraction_tool,
     brave_web_search,
     python_code_interpreter_tool,
-    audio_file_transcriber
 ]
 rate_limiter = InMemoryRateLimiter(

     webpage_extraction_tool,
     brave_web_search,
     python_code_interpreter_tool,
+    audio_file_transcriber,
+    get_youtube_transcript
 )
 class AgentState(TypedDict):
     webpage_extraction_tool,
     brave_web_search,
     python_code_interpreter_tool,
+    audio_file_transcriber,
+    get_youtube_transcript
 ]
 rate_limiter = InMemoryRateLimiter(

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ assemblyai
 openpyxl
 langchain-community
 pandas
-rizaio

 openpyxl
 langchain-community
 pandas
+rizaio
+youtube_transcript_api

tools.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain_core.messages import HumanMessage
 from langchain_community.tools.riza.command import ExecPython
 from langchain_community.tools import BraveSearch
 from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
-import requests
 import base64
 import pandas as pd
@@ -67,6 +67,36 @@ def audio_file_transcriber(file_path: str) -> str :
     docs = loader.load()
     return docs[0].page_content
 brave_web_search = BraveSearch.from_search_kwargs({"max_results": 4})

 from langchain_community.tools.riza.command import ExecPython
 from langchain_community.tools import BraveSearch
 from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 import base64
 import pandas as pd
     docs = loader.load()
     return docs[0].page_content
+@tool
+def get_youtube_transcript(video_url: str, lang: Optional[str] = 'en') -> str:
+    """Extracts and returns the transcript of a YouTube video.
+    Args:
+        video_url (str): The full YouTube video URL.
+        lang (Optional[str]): The language of the transcript. Defaults to 'en'.
+    Returns:
+        str: The full transcript as a string, or a message if not available.
+    """
+    import re
+    video_id_match = re.search(r"(?:v=|youtu.be/)([\w-]{11})", video_url)
+    if not video_id_match:
+        return "Invalid YouTube URL"
+    video_id = video_id_match.group(1)
+    try:
+        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+        transcript = transcript_list.find_transcript([lang])
+        transcript_text = " ".join([entry['text'] for entry in transcript.fetch()])
+        return transcript_text
+    except TranscriptsDisabled:
+        return "Transcript is disabled for this video."
+    except NoTranscriptFound:
+        return f"No transcript found in language: {lang}"
+    except Exception as e:
+        return f"Error retrieving transcript: {str(e)}"
 brave_web_search = BraveSearch.from_search_kwargs({"max_results": 4})