Spaces:

arcticaurora
/

One-MCP

Sleeping

App Files Files Community

arcticaurora commited on Jun 24, 2025

Commit

c91b4f8

verified ·

1 Parent(s): 93b570e

Update tools/youtube.py

Browse files

Files changed (1) hide show

tools/youtube.py +25 -5

tools/youtube.py CHANGED Viewed

@@ -53,7 +53,9 @@ def generate_random_headers():
 def extract_video_id(url_or_id: str) -> str:
     """Extract video ID from YouTube URL or return if already an ID."""
-    if len(url_or_id) == 11 and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
         return url_or_id
     patterns = [
@@ -69,7 +71,10 @@ def extract_video_id(url_or_id: str) -> str:
         if match:
             return match.group(1)
-    return url_or_id
 @mcp.tool()
 def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool = False) -> str:
@@ -80,7 +85,7 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
         include_timestamps: Include timestamps (default: False)
     Returns:
-        Transcript text with optional timestamps
     """
     video_id = extract_video_id(video_url_or_id)
@@ -101,6 +106,12 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
         if data.get('code') != 100000:
             raise ValueError(f"API error: {data.get('message', 'Unknown error')}")
         transcripts = data.get('data', {}).get('transcripts', {})
         transcript_entries = None
@@ -116,15 +127,24 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
         if not transcript_entries:
             raise ValueError("No transcript available")
         if include_timestamps:
             formatted_transcript = []
             for entry in transcript_entries:
                 timestamp = f"[{entry['start']}]"
                 text = entry['text']
                 formatted_transcript.append(f"{timestamp} {text}")
-            return "\n\n".join(formatted_transcript)
         else:
-            return " ".join(entry['text'] for entry in transcript_entries)
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404:

 def extract_video_id(url_or_id: str) -> str:
     """Extract video ID from YouTube URL or return if already an ID."""
+    id_pattern = r'^[\w-]{11}$'
+    if re.match(id_pattern, url_or_id) and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
         return url_or_id
     patterns = [
         if match:
             return match.group(1)
+    if re.match(id_pattern, url_or_id):
+        return url_or_id
+    raise ValueError(f"Invalid YouTube URL or video ID: {url_or_id}")
 @mcp.tool()
 def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool = False) -> str:
         include_timestamps: Include timestamps (default: False)
     Returns:
+        Video title, channel name, and transcript text with optional timestamps
     """
     video_id = extract_video_id(video_url_or_id)
         if data.get('code') != 100000:
             raise ValueError(f"API error: {data.get('message', 'Unknown error')}")
+        # Extract video info
+        video_info = data.get('data', {}).get('videoInfo', {})
+        video_title = video_info.get('name', 'Unknown Title')
+        channel_name = video_info.get('author', 'Unknown Channel')
+        # Extract transcripts
         transcripts = data.get('data', {}).get('transcripts', {})
         transcript_entries = None
         if not transcript_entries:
             raise ValueError("No transcript available")
+        # Format the transcript
+        result_parts = [
+            f"Title: {video_title}",
+            f"Channel: {channel_name}",
+            "\n---\n"
+        ]
         if include_timestamps:
             formatted_transcript = []
             for entry in transcript_entries:
                 timestamp = f"[{entry['start']}]"
                 text = entry['text']
                 formatted_transcript.append(f"{timestamp} {text}")
+            result_parts.append("\n\n".join(formatted_transcript))
         else:
+            result_parts.append(" ".join(entry['text'] for entry in transcript_entries))
+        return "\n".join(result_parts)
     except requests.exceptions.HTTPError as e:
         if e.response.status_code == 404: