Spaces:
Sleeping
Sleeping
| """MCP Server for Podcast Recommendations - YouTube Integration""" | |
| from typing import List, Dict, Any | |
| import random | |
| import socket | |
| import time as time_module | |
| # Try to import yt-dlp | |
| try: | |
| import yt_dlp | |
| YT_DLP_AVAILABLE = True | |
| except ImportError: | |
| YT_DLP_AVAILABLE = False | |
| print("⚠️ yt-dlp not available for podcast search") | |
| class PodcastMCPServer: | |
| """MCP Server for podcast discovery and recommendations via YouTube""" | |
| def __init__(self): | |
| self.name = "podcast_server" | |
| self.description = "Provides podcast recommendations from YouTube" | |
| self._embed_cache = {} | |
| # Rate limiting | |
| self._last_youtube_call = 0 | |
| self._min_call_interval = 3.0 # Minimum 3 seconds between YouTube calls | |
| # Track recently played podcasts | |
| self._recently_played = [] | |
| self._max_recent = 10 | |
| def _check_youtube_available(self) -> bool: | |
| """Check if YouTube is accessible via DNS""" | |
| try: | |
| socket.gethostbyname('www.youtube.com') | |
| return True | |
| except socket.gaierror: | |
| return False | |
| def _rate_limit_youtube(self): | |
| """Enforce rate limiting for YouTube API calls""" | |
| import time as time_module | |
| current_time = time_module.time() | |
| elapsed = current_time - self._last_youtube_call | |
| if elapsed < self._min_call_interval: | |
| sleep_time = self._min_call_interval - elapsed | |
| print(f"⏳ Podcast rate limiting: waiting {sleep_time:.1f}s...") | |
| time_module.sleep(sleep_time) | |
| self._last_youtube_call = time_module.time() | |
| def check_video_embeddable(self, video_id: str) -> bool: | |
| """Check if a YouTube video is embeddable""" | |
| if video_id in self._embed_cache: | |
| return self._embed_cache[video_id] | |
| if not YT_DLP_AVAILABLE: | |
| return True # Assume embeddable if we can't check | |
| try: | |
| ydl_opts = { | |
| 'quiet': True, | |
| 'no_warnings': True, | |
| 'skip_download': True, | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| url = f"https://www.youtube.com/watch?v={video_id}" | |
| info = ydl.extract_info(url, download=False) | |
| if not info: | |
| self._embed_cache[video_id] = False | |
| return False | |
| # Check availability | |
| availability = info.get('availability') | |
| if availability and availability != 'public': | |
| print(f"⚠️ Podcast {video_id}: not public ({availability})") | |
| self._embed_cache[video_id] = False | |
| return False | |
| # Check age restriction | |
| if info.get('age_limit', 0) > 0: | |
| print(f"⚠️ Podcast {video_id}: age restricted") | |
| self._embed_cache[video_id] = False | |
| return False | |
| # Check embed playability | |
| if info.get('playable_in_embed') is False: | |
| print(f"⚠️ Podcast {video_id}: not playable in embed") | |
| self._embed_cache[video_id] = False | |
| return False | |
| self._embed_cache[video_id] = True | |
| return True | |
| except Exception as e: | |
| print(f"⚠️ Podcast {video_id}: check failed - {e}") | |
| self._embed_cache[video_id] = False | |
| return False | |
| def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]: | |
| """ | |
| Search YouTube for podcasts with retry logic for network issues | |
| Args: | |
| query: Search query | |
| category: Podcast category for context | |
| limit: Number of results to return | |
| Returns: | |
| List of podcast information with YouTube URLs | |
| """ | |
| if not YT_DLP_AVAILABLE: | |
| print("⚠️ yt-dlp not available, using demo podcasts") | |
| return self._get_demo_podcasts(category, limit) | |
| # Check if YouTube is accessible before attempting search | |
| if not self._check_youtube_available(): | |
| print("⚠️ YouTube is not accessible (DNS/network issue). Using demo podcasts.") | |
| return self._get_demo_podcasts(category, limit) | |
| # Apply rate limiting | |
| self._rate_limit_youtube() | |
| max_retries = 3 | |
| retry_delay = 2 # Start with 2 seconds | |
| for attempt in range(max_retries): | |
| try: | |
| # Try to resolve DNS first (helps diagnose network issues) | |
| try: | |
| socket.gethostbyname('www.youtube.com') | |
| except socket.gaierror as dns_error: | |
| if attempt < max_retries - 1: | |
| print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...") | |
| time_module.sleep(retry_delay) | |
| retry_delay *= 2 # Exponential backoff | |
| continue | |
| else: | |
| print(f"❌ DNS resolution failed after {max_retries} attempts. Using demo podcasts.") | |
| return self._get_demo_podcasts(category, limit) | |
| # Build search query for podcasts | |
| search_query = f"{query} podcast {category}" | |
| print(f"🎙️ Searching YouTube for podcast: {search_query}") | |
| ydl_opts = { | |
| 'quiet': True, | |
| 'no_warnings': True, | |
| 'extract_flat': True, | |
| 'default_search': 'ytsearch10', | |
| 'socket_timeout': 30, # Increase timeout for network issues | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| result = ydl.extract_info(f"ytsearch10:{search_query}", download=False) | |
| if not result: | |
| print("❌ No podcast results found") | |
| return self._get_demo_podcasts(category, limit) | |
| entries = result.get('entries', []) | |
| if not entries: | |
| print("❌ No podcast entries found") | |
| return self._get_demo_podcasts(category, limit) | |
| # Filter and format results, checking embeddability | |
| podcasts = [] | |
| random.shuffle(entries) # Shuffle first for variety | |
| for entry in entries: | |
| if entry is None: | |
| continue | |
| if len(podcasts) >= limit: | |
| break | |
| video_id = entry.get('id', '') | |
| title = entry.get('title', 'Unknown Podcast') | |
| channel = entry.get('uploader', entry.get('channel', 'Unknown Host')) | |
| duration = entry.get('duration', 0) | |
| if video_id: | |
| # Check if video is embeddable | |
| if not self.check_video_embeddable(video_id): | |
| print(f" ✗ Skipping non-embeddable podcast: {title[:50]}") | |
| continue | |
| podcasts.append({ | |
| "title": title, | |
| "description": f"Podcast episode about {category}", | |
| "host": channel, | |
| "duration": f"{duration // 60} min" if duration else "Unknown", | |
| "duration_seconds": duration or 0, | |
| "category": category, | |
| "rating": round(random.uniform(4.0, 5.0), 1), | |
| "source": "youtube", | |
| "youtube_id": video_id, | |
| "url": f"https://www.youtube.com/watch?v={video_id}" | |
| }) | |
| print(f" ✓ Found embeddable podcast: {title[:50]}") | |
| if podcasts: | |
| print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube") | |
| return podcasts | |
| else: | |
| return self._get_demo_podcasts(category, limit) | |
| # Success! Break out of retry loop | |
| break | |
| except (yt_dlp.utils.DownloadError, Exception) as e: | |
| error_str = str(e) | |
| # Check for DNS/network errors | |
| if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]): | |
| if attempt < max_retries - 1: | |
| print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...") | |
| print(f" Retrying in {retry_delay}s...") | |
| time_module.sleep(retry_delay) | |
| retry_delay *= 2 # Exponential backoff | |
| continue | |
| else: | |
| print(f"❌ Network error after {max_retries} attempts. Using demo podcasts.") | |
| return self._get_demo_podcasts(category, limit) | |
| else: | |
| # Other errors, don't retry | |
| print(f"❌ Error searching YouTube for podcasts: {e}") | |
| return self._get_demo_podcasts(category, limit) | |
| return self._get_demo_podcasts(category, limit) | |
| def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]: | |
| """Get demo podcasts as fallback""" | |
| demo_podcasts = { | |
| "technology": [ | |
| { | |
| "title": "The AI Revolution", | |
| "description": "Exploring the latest in artificial intelligence and machine learning", | |
| "host": "Dr. Sarah Chen", | |
| "duration": "45 min", | |
| "category": "technology", | |
| "rating": 4.8, | |
| "source": "demo" | |
| }, | |
| { | |
| "title": "Code & Coffee", | |
| "description": "Daily dose of programming tips and tech news", | |
| "host": "Alex Rodriguez", | |
| "duration": "30 min", | |
| "category": "technology", | |
| "rating": 4.6, | |
| "source": "demo" | |
| } | |
| ], | |
| "business": [ | |
| { | |
| "title": "Startup Stories", | |
| "description": "Interviews with successful entrepreneurs", | |
| "host": "Michael Zhang", | |
| "duration": "50 min", | |
| "category": "business", | |
| "rating": 4.7, | |
| "source": "demo" | |
| } | |
| ], | |
| "comedy": [ | |
| { | |
| "title": "Daily Laughs", | |
| "description": "Your daily dose of comedy and humor", | |
| "host": "Jenny Smith", | |
| "duration": "35 min", | |
| "category": "comedy", | |
| "rating": 4.5, | |
| "source": "demo" | |
| } | |
| ], | |
| "education": [ | |
| { | |
| "title": "Learn Something New", | |
| "description": "Fascinating facts and educational content", | |
| "host": "Prof. David Lee", | |
| "duration": "40 min", | |
| "category": "education", | |
| "rating": 4.9, | |
| "source": "demo" | |
| } | |
| ], | |
| "news": [ | |
| { | |
| "title": "World Today", | |
| "description": "Daily news analysis and commentary", | |
| "host": "Maria Garcia", | |
| "duration": "25 min", | |
| "category": "news", | |
| "rating": 4.6, | |
| "source": "demo" | |
| } | |
| ] | |
| } | |
| podcasts = demo_podcasts.get(category, demo_podcasts["technology"]) | |
| return podcasts[:limit] | |
| def get_trending_podcasts(self, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]: | |
| """ | |
| Get trending podcasts by category from YouTube | |
| Args: | |
| category: Podcast category (technology, business, comedy, education, news) | |
| limit: Number of podcasts to return | |
| Returns: | |
| List of podcast information | |
| """ | |
| # Search YouTube for trending podcasts in this category | |
| search_terms = { | |
| "technology": "tech podcast 2024", | |
| "business": "business podcast interview", | |
| "comedy": "comedy podcast funny", | |
| "education": "educational podcast learn", | |
| "news": "news podcast analysis", | |
| "true-crime": "true crime podcast" | |
| } | |
| query = search_terms.get(category, f"{category} podcast") | |
| return self.search_youtube_podcast(query, category, limit) | |
| def get_personalized_podcasts(self, user_preferences: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| """ | |
| Get personalized podcast recommendations from YouTube | |
| Args: | |
| user_preferences: User's podcast preferences | |
| Returns: | |
| List of recommended podcasts | |
| """ | |
| interests = user_preferences.get("podcast_interests", ["technology"]) | |
| recommendations = [] | |
| for interest in interests[:2]: # Limit to 2 categories for speed | |
| podcasts = self.get_trending_podcasts(category=interest, limit=1) | |
| recommendations.extend(podcasts) | |
| return recommendations | |
| def get_tools_definition(self) -> List[Dict[str, Any]]: | |
| """Return MCP tools definition for this server""" | |
| return [ | |
| { | |
| "name": "get_trending_podcasts", | |
| "description": "Get trending podcasts by category from YouTube", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "category": { | |
| "type": "string", | |
| "description": "Podcast category (technology, business, comedy, education, news, true-crime)" | |
| }, | |
| "limit": { | |
| "type": "integer", | |
| "description": "Number of podcasts to return" | |
| } | |
| }, | |
| "required": ["category"] | |
| } | |
| }, | |
| { | |
| "name": "get_personalized_podcasts", | |
| "description": "Get personalized podcast recommendations from YouTube", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "user_preferences": { | |
| "type": "object", | |
| "description": "User's podcast preferences" | |
| } | |
| }, | |
| "required": ["user_preferences"] | |
| } | |
| }, | |
| { | |
| "name": "search_youtube_podcast", | |
| "description": "Search YouTube for specific podcasts", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "query": { | |
| "type": "string", | |
| "description": "Search query for podcasts" | |
| }, | |
| "category": { | |
| "type": "string", | |
| "description": "Podcast category for context" | |
| }, | |
| "limit": { | |
| "type": "integer", | |
| "description": "Number of results to return" | |
| } | |
| }, | |
| "required": ["query"] | |
| } | |
| } | |
| ] | |