cowrycode commited on
Commit
f8a91e9
·
verified ·
1 Parent(s): 4793736

Update youtube_tool.py

Browse files
Files changed (1) hide show
  1. youtube_tool.py +8 -45
youtube_tool.py CHANGED
@@ -14,46 +14,6 @@ def extract_video_id(url: str) -> str:
14
  Returns:
15
  str: The extracted video ID or raises ValueError.
16
  """
17
- patterns = [
18
- r"youtube\.com/watch\?v=([a-zA-Z0-9_-]{11})",
19
- r"youtu\.be/([a-zA-Z0-9_-]{11})"
20
- ]
21
- for pattern in patterns:
22
- match = re.search(pattern, url)
23
- if match:
24
- return match.group(1)
25
- raise ValueError("Invalid YouTube URL or unable to extract video ID.")
26
-
27
- def get_youtube_transcript(url: str) -> str:
28
- """
29
- Fetches the transcript text for a given YouTube video.
30
- Args:
31
- url (str): The YouTube video URL.
32
- Returns:
33
- str: Combined transcript text or an error message.
34
- """
35
- try:
36
- video_id = extract_video_id(url)
37
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
38
- full_text = " ".join([entry["text"] for entry in transcript_list])
39
- return full_text.strip()[:2000] # Truncate to 2000 chars to prevent token overflow
40
- except TranscriptsDisabled:
41
- return "This video has transcripts disabled."
42
- except NoTranscriptFound:
43
- return "No transcript was found for this video."
44
- except Exception as e:
45
- return f"Transcript error: {str(e)}"
46
-
47
- youtube_tool = FunctionTool.from_defaults(get_youtube_transcript)
48
-
49
-
50
- def extract_video_id(url: str) -> str:
51
- """
52
- Handles typical YouTube URLs:
53
- - https://www.youtube.com/watch?v=VIDEO_ID
54
- - https://youtu.be/VIDEO_ID
55
- - with extra query params
56
- """
57
  parsed = urlparse(url)
58
  if parsed.hostname in {"www.youtube.com", "youtube.com"}:
59
  qs = parse_qs(parsed.query)
@@ -62,10 +22,14 @@ def extract_video_id(url: str) -> str:
62
  # fallback for youtu.be or raw IDs
63
  return parsed.path.lstrip("/")
64
 
 
65
  def fetch_youtube_transcript(video_url: str) -> str:
66
  """
67
- Fetch YouTube transcript text for the given URL.
68
- In English language.
 
 
 
69
  """
70
  video_id = extract_video_id(video_url)
71
 
@@ -77,9 +41,8 @@ def fetch_youtube_transcript(video_url: str) -> str:
77
  )
78
 
79
  #FROM TRANSCRIPT DATA, YOU CAN CREATE A OBJECT OF TRANSCRIPT SNIPET AND TIME
80
- arr = [snippet.text for snippet in transcript_data]
81
- return " ".join(arr)
82
- #return " ".join(entry["text"] for entry in arr)
83
  except Exception as e:
84
  return f"Error fetching video details: {str(e)}"
85
 
 
14
  Returns:
15
  str: The extracted video ID or raises ValueError.
16
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  parsed = urlparse(url)
18
  if parsed.hostname in {"www.youtube.com", "youtube.com"}:
19
  qs = parse_qs(parsed.query)
 
22
  # fallback for youtu.be or raw IDs
23
  return parsed.path.lstrip("/")
24
 
25
+
26
  def fetch_youtube_transcript(video_url: str) -> str:
27
  """
28
+ Fetches the transcript text for a given YouTube video.
29
+ Args:
30
+ url (str): The YouTube video URL.
31
+ Returns:
32
+ str: Combined transcript text or an error message.
33
  """
34
  video_id = extract_video_id(video_url)
35
 
 
41
  )
42
 
43
  #FROM TRANSCRIPT DATA, YOU CAN CREATE A OBJECT OF TRANSCRIPT SNIPET AND TIME
44
+ arr = [ {"text": snippet.text} for snippet in transcript_data]
45
+ return " ".join(f"{entry['text']}" for entry in arr)
 
46
  except Exception as e:
47
  return f"Error fetching video details: {str(e)}"
48