arcticaurora commited on
Commit
c91b4f8
·
verified ·
1 Parent(s): 93b570e

Update tools/youtube.py

Browse files
Files changed (1) hide show
  1. tools/youtube.py +25 -5
tools/youtube.py CHANGED
@@ -53,7 +53,9 @@ def generate_random_headers():
53
 
54
  def extract_video_id(url_or_id: str) -> str:
55
  """Extract video ID from YouTube URL or return if already an ID."""
56
- if len(url_or_id) == 11 and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
 
 
57
  return url_or_id
58
 
59
  patterns = [
@@ -69,7 +71,10 @@ def extract_video_id(url_or_id: str) -> str:
69
  if match:
70
  return match.group(1)
71
 
72
- return url_or_id
 
 
 
73
 
74
  @mcp.tool()
75
  def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool = False) -> str:
@@ -80,7 +85,7 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
80
  include_timestamps: Include timestamps (default: False)
81
 
82
  Returns:
83
- Transcript text with optional timestamps
84
  """
85
  video_id = extract_video_id(video_url_or_id)
86
 
@@ -101,6 +106,12 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
101
  if data.get('code') != 100000:
102
  raise ValueError(f"API error: {data.get('message', 'Unknown error')}")
103
 
 
 
 
 
 
 
104
  transcripts = data.get('data', {}).get('transcripts', {})
105
 
106
  transcript_entries = None
@@ -116,15 +127,24 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
116
  if not transcript_entries:
117
  raise ValueError("No transcript available")
118
 
 
 
 
 
 
 
 
119
  if include_timestamps:
120
  formatted_transcript = []
121
  for entry in transcript_entries:
122
  timestamp = f"[{entry['start']}]"
123
  text = entry['text']
124
  formatted_transcript.append(f"{timestamp} {text}")
125
- return "\n\n".join(formatted_transcript)
126
  else:
127
- return " ".join(entry['text'] for entry in transcript_entries)
 
 
128
 
129
  except requests.exceptions.HTTPError as e:
130
  if e.response.status_code == 404:
 
53
 
54
  def extract_video_id(url_or_id: str) -> str:
55
  """Extract video ID from YouTube URL or return if already an ID."""
56
+ id_pattern = r'^[\w-]{11}$'
57
+
58
+ if re.match(id_pattern, url_or_id) and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
59
  return url_or_id
60
 
61
  patterns = [
 
71
  if match:
72
  return match.group(1)
73
 
74
+ if re.match(id_pattern, url_or_id):
75
+ return url_or_id
76
+
77
+ raise ValueError(f"Invalid YouTube URL or video ID: {url_or_id}")
78
 
79
  @mcp.tool()
80
  def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool = False) -> str:
 
85
  include_timestamps: Include timestamps (default: False)
86
 
87
  Returns:
88
+ Video title, channel name, and transcript text with optional timestamps
89
  """
90
  video_id = extract_video_id(video_url_or_id)
91
 
 
106
  if data.get('code') != 100000:
107
  raise ValueError(f"API error: {data.get('message', 'Unknown error')}")
108
 
109
+ # Extract video info
110
+ video_info = data.get('data', {}).get('videoInfo', {})
111
+ video_title = video_info.get('name', 'Unknown Title')
112
+ channel_name = video_info.get('author', 'Unknown Channel')
113
+
114
+ # Extract transcripts
115
  transcripts = data.get('data', {}).get('transcripts', {})
116
 
117
  transcript_entries = None
 
127
  if not transcript_entries:
128
  raise ValueError("No transcript available")
129
 
130
+ # Format the transcript
131
+ result_parts = [
132
+ f"Title: {video_title}",
133
+ f"Channel: {channel_name}",
134
+ "\n---\n"
135
+ ]
136
+
137
  if include_timestamps:
138
  formatted_transcript = []
139
  for entry in transcript_entries:
140
  timestamp = f"[{entry['start']}]"
141
  text = entry['text']
142
  formatted_transcript.append(f"{timestamp} {text}")
143
+ result_parts.append("\n\n".join(formatted_transcript))
144
  else:
145
+ result_parts.append(" ".join(entry['text'] for entry in transcript_entries))
146
+
147
+ return "\n".join(result_parts)
148
 
149
  except requests.exceptions.HTTPError as e:
150
  if e.response.status_code == 404: