Spaces:

arcticaurora
/

One-MCP

Paused

App Files Files Community

arcticaurora commited on Jul 25, 2025

Commit

a4ac4ba

verified ·

1 Parent(s): afdc948

Update tools/youtube.py

Browse files

Files changed (1) hide show

tools/youtube.py +87 -42

tools/youtube.py CHANGED Viewed

@@ -4,6 +4,7 @@ import random
 import time
 import uuid
 import re
 mcp = FastMCP("Youtube")
@@ -55,6 +56,9 @@ def extract_video_id(url_or_id: str) -> str:
     """Extract video ID from YouTube URL or return if already an ID."""
     id_pattern = r'^[\w-]{11}$'
     if re.match(id_pattern, url_or_id) and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
         return url_or_id
@@ -76,42 +80,27 @@ def extract_video_id(url_or_id: str) -> str:
     raise ValueError(f"Invalid YouTube URL or video ID: {url_or_id}")
-@mcp.tool()
-def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool = False):
-    """Get transcript text from a YouTube video.
-    Args:
-        video_url_or_id: YouTube URL or 11-char video ID
-        include_timestamps: Include timestamps (default: False)
-    """
-    video_id = extract_video_id(video_url_or_id)
     api_url = 'https://notegpt.io/api/v2/video-transcript'
-    params = {
-        'platform': 'youtube',
-        'video_id': video_id,
-    }
-    headers = generate_random_headers()
     try:
         response = requests.get(api_url, params=params, headers=headers, timeout=30)
         response.raise_for_status()
         data = response.json()
         if data.get('code') != 100000:
-            raise ValueError(f"API error: {data.get('message', 'Unknown error')}")
-        # Extract video info
         video_info = data.get('data', {}).get('videoInfo', {})
         video_title = video_info.get('name', 'Unknown Title')
         channel_name = video_info.get('author', 'Unknown Channel')
-        # Extract transcripts
         transcripts = data.get('data', {}).get('transcripts', {})
         transcript_entries = None
         for lang_code in ['en', 'en_auto']:
             if lang_code in transcripts:
@@ -123,36 +112,92 @@ def get_youtube_video_transcript(video_url_or_id: str, include_timestamps: bool
             transcript_entries = first_lang.get('custom', [])
         if not transcript_entries:
-            raise ValueError("No transcript available")
-        # Format the transcript
-        result_parts = [
-            f"Title: {video_title}",
-            f"Channel: {channel_name}",
-            "\n---\n"
-        ]
         if include_timestamps:
-            formatted_transcript = []
-            for entry in transcript_entries:
-                timestamp = f"[{entry['start']}]"
-                text = entry['text']
-                formatted_transcript.append(f"{timestamp} {text}")
-            result_parts.append("\n\n".join(formatted_transcript))
         else:
             result_parts.append(" ".join(entry['text'] for entry in transcript_entries))
         return "\n".join(result_parts)
     except requests.exceptions.HTTPError as e:
-        if e.response.status_code == 404:
-            raise ValueError(f"Video not found: {video_id}")
-        else:
-            raise ValueError(f"HTTP error: {e}")
     except requests.exceptions.RequestException as e:
-        raise ValueError(f"Network error: {e}")
     except Exception as e:
-        raise ValueError(f"Error: {e}")
 if __name__ == "__main__":
     mcp.run()

 import time
 import uuid
 import re
+import concurrent.futures
 mcp = FastMCP("Youtube")
     """Extract video ID from YouTube URL or return if already an ID."""
     id_pattern = r'^[\w-]{11}$'
+    # Clean up input before matching
+    url_or_id = url_or_id.strip()
     if re.match(id_pattern, url_or_id) and not ('youtube.com' in url_or_id or 'youtu.be' in url_or_id):
         return url_or_id
     raise ValueError(f"Invalid YouTube URL or video ID: {url_or_id}")
+def _fetch_single_transcript(video_id: str, include_timestamps: bool) -> str:
+    """Helper function to fetch and format a single video transcript."""
     api_url = 'https://notegpt.io/api/v2/video-transcript'
+    params = {'platform': 'youtube', 'video_id': video_id}
     try:
+        # Each call gets its own unique headers
+        headers = generate_random_headers()
         response = requests.get(api_url, params=params, headers=headers, timeout=30)
         response.raise_for_status()
         data = response.json()
         if data.get('code') != 100000:
+            return f"Error for video {video_id}: API error - {data.get('message', 'Unknown error')}"
         video_info = data.get('data', {}).get('videoInfo', {})
         video_title = video_info.get('name', 'Unknown Title')
         channel_name = video_info.get('author', 'Unknown Channel')
         transcripts = data.get('data', {}).get('transcripts', {})
         transcript_entries = None
         for lang_code in ['en', 'en_auto']:
             if lang_code in transcripts:
             transcript_entries = first_lang.get('custom', [])
         if not transcript_entries:
+            return f"Error for video {video_id}: No transcript available."
+        result_parts = [f"Title: {video_title}", f"Channel: {channel_name}", f"Video ID: {video_id}", "\n---"]
         if include_timestamps:
+            formatted_transcript = "\n\n".join([f"[{entry['start']}] {entry['text']}" for entry in transcript_entries])
+            result_parts.append(formatted_transcript)
         else:
             result_parts.append(" ".join(entry['text'] for entry in transcript_entries))
         return "\n".join(result_parts)
     except requests.exceptions.HTTPError as e:
+        return f"Error for video {video_id}: HTTP error - {e}"
     except requests.exceptions.RequestException as e:
+        return f"Error for video {video_id}: Network error - {e}"
     except Exception as e:
+        return f"Error for video {video_id}: An unexpected error occurred - {e}"
+@mcp.tool()
+def get_youtube_video_transcript(video_urls_or_ids: str, include_timestamps: bool = False):
+    """Get transcript text from one or more YouTube videos in parallel.
+    Args:
+        video_urls_or_ids: A single YouTube URL or 11-character video ID,
+                           OR a comma-separated string of multiple URLs or IDs.
+        include_timestamps: Include timestamps in the output (default: False).
+    Returns:
+        A string containing the formatted transcript(s). If multiple videos
+        are processed, their transcripts are concatenated and separated by a
+        clear delimiter. Errors for individual videos are reported inline.
+    Examples:
+        # 1. Single URL
+        get_youtube_video_transcript(video_urls_or_ids="https://www.youtube.com/watch?v=dQw4w9WgXcQ")
+        # 2. Multiple URLs (comma-separated)
+        get_youtube_video_transcript(
+            video_urls_or_ids="https://www.youtube.com/watch?v=dQw4w9WgXcQ,https://youtu.be/L_Guz73e6fw"
+        )
+        # 3. Multiple Video IDs (comma-separated)
+        get_youtube_video_transcript(
+            video_urls_or_ids="dQw4w9WgXcQ,L_Guz73e6fw,QH2-TGUlwu4"
+        )
+        # 4. Mix of URLs and IDs with timestamps
+        get_youtube_video_transcript(
+            video_urls_or_ids="https://www.youtube.com/watch?v=dQw4w9WgXcQ,L_Guz73e6fw",
+            include_timestamps=True
+        )
+    """
+    inputs = [item.strip() for item in video_urls_or_ids.split(',')]
+    video_ids = []
+    errors = []
+    for item in inputs:
+        try:
+            video_ids.append(extract_video_id(item))
+        except ValueError as e:
+            errors.append(str(e))
+    all_results = []
+    if errors:
+        all_results.append("--- INPUT ERRORS ---\n" + "\n".join(errors))
+    if not video_ids:
+        if not errors:
+            return "Error: No valid video URLs or IDs were provided."
+        return "\n".join(all_results)
+    # Use ThreadPoolExecutor to fetch transcripts in parallel
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # map() ensures that each video_id is passed to the helper function
+        # A lambda is used to pass the include_timestamps argument as well
+        future_to_video = {
+            executor.submit(_fetch_single_transcript, vid, include_timestamps): vid for vid in video_ids
+        }
+        # Process results as they complete
+        for future in concurrent.futures.as_completed(future_to_video):
+            all_results.append(future.result())
+    # Join all individual results with a clear separator
+    return "\n\n--- --- ---\n\n".join(all_results)
 if __name__ == "__main__":
     mcp.run()