grshot commited on
Commit
153b523
·
1 Parent(s): 3c69bee

update youtube

Browse files
Files changed (2) hide show
  1. agent.py +8 -2
  2. requirements.txt +1 -1
agent.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  from typing import Annotated
3
 
4
- from langchain_community.document_loaders import WikipediaLoader, YouTubeLoader
 
5
 
6
  # --- Langchain / Langraph ---
7
  from langchain_community.tools.tavily_search import TavilySearchResults
@@ -60,7 +61,12 @@ def search_wikipedia(query: str) -> dict:
60
  def extract_youtube_transcript(video_url: str) -> dict:
61
  """Extract transcript from a YouTube video given its URL using LangChain's YouTubeLoader."""
62
  try:
63
- loader = YouTubeLoader(video_urls=[video_url])
 
 
 
 
 
64
  docs = loader.load()
65
  if docs:
66
  formatted_docs = "\n\n---\n\n".join(
 
1
  import os
2
  from typing import Annotated
3
 
4
+ from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
5
+ from langchain_community.document_loaders.youtube import TranscriptFormat
6
 
7
  # --- Langchain / Langraph ---
8
  from langchain_community.tools.tavily_search import TavilySearchResults
 
61
  def extract_youtube_transcript(video_url: str) -> dict:
62
  """Extract transcript from a YouTube video given its URL using LangChain's YouTubeLoader."""
63
  try:
64
+ loader = YoutubeLoader.from_youtube_url(
65
+ video_url,
66
+ add_video_info=True,
67
+ transcript_format=TranscriptFormat.CHUNKS,
68
+ chunk_size_seconds=30,
69
+ )
70
  docs = loader.load()
71
  if docs:
72
  formatted_docs = "\n\n---\n\n".join(
requirements.txt CHANGED
@@ -10,4 +10,4 @@ langchain-tavily
10
  langgraph
11
  tavily-python
12
  wikipedia
13
- youtube-transcript-api
 
10
  langgraph
11
  tavily-python
12
  wikipedia
13
+ youtube-transcript-api==0.6.3