"""MCP Server for Podcast Recommendations - YouTube Integration""" from typing import List, Dict, Any import random import socket import time as time_module # Try to import yt-dlp try: import yt_dlp YT_DLP_AVAILABLE = True except ImportError: YT_DLP_AVAILABLE = False print("⚠️ yt-dlp not available for podcast search") class PodcastMCPServer: """MCP Server for podcast discovery and recommendations via YouTube""" def __init__(self): self.name = "podcast_server" self.description = "Provides podcast recommendations from YouTube" self._embed_cache = {} # Rate limiting self._last_youtube_call = 0 self._min_call_interval = 3.0 # Minimum 3 seconds between YouTube calls # Track recently played podcasts self._recently_played = [] self._max_recent = 10 def _check_youtube_available(self) -> bool: """Check if YouTube is accessible via DNS""" try: socket.gethostbyname('www.youtube.com') return True except socket.gaierror: return False def _rate_limit_youtube(self): """Enforce rate limiting for YouTube API calls""" import time as time_module current_time = time_module.time() elapsed = current_time - self._last_youtube_call if elapsed < self._min_call_interval: sleep_time = self._min_call_interval - elapsed print(f"⏳ Podcast rate limiting: waiting {sleep_time:.1f}s...") time_module.sleep(sleep_time) self._last_youtube_call = time_module.time() def check_video_embeddable(self, video_id: str) -> bool: """Check if a YouTube video is embeddable""" if video_id in self._embed_cache: return self._embed_cache[video_id] if not YT_DLP_AVAILABLE: return True # Assume embeddable if we can't check try: ydl_opts = { 'quiet': True, 'no_warnings': True, 'skip_download': True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: url = f"https://www.youtube.com/watch?v={video_id}" info = ydl.extract_info(url, download=False) if not info: self._embed_cache[video_id] = False return False # Check availability availability = info.get('availability') if availability and availability != 'public': print(f"⚠️ Podcast {video_id}: not public ({availability})") self._embed_cache[video_id] = False return False # Check age restriction if info.get('age_limit', 0) > 0: print(f"⚠️ Podcast {video_id}: age restricted") self._embed_cache[video_id] = False return False # Check embed playability if info.get('playable_in_embed') is False: print(f"⚠️ Podcast {video_id}: not playable in embed") self._embed_cache[video_id] = False return False self._embed_cache[video_id] = True return True except Exception as e: print(f"⚠️ Podcast {video_id}: check failed - {e}") self._embed_cache[video_id] = False return False def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]: """ Search YouTube for podcasts with retry logic for network issues Args: query: Search query category: Podcast category for context limit: Number of results to return Returns: List of podcast information with YouTube URLs """ if not YT_DLP_AVAILABLE: print("⚠️ yt-dlp not available, using demo podcasts") return self._get_demo_podcasts(category, limit) # Check if YouTube is accessible before attempting search if not self._check_youtube_available(): print("⚠️ YouTube is not accessible (DNS/network issue). Using demo podcasts.") return self._get_demo_podcasts(category, limit) # Apply rate limiting self._rate_limit_youtube() max_retries = 3 retry_delay = 2 # Start with 2 seconds for attempt in range(max_retries): try: # Try to resolve DNS first (helps diagnose network issues) try: socket.gethostbyname('www.youtube.com') except socket.gaierror as dns_error: if attempt < max_retries - 1: print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...") time_module.sleep(retry_delay) retry_delay *= 2 # Exponential backoff continue else: print(f"❌ DNS resolution failed after {max_retries} attempts. Using demo podcasts.") return self._get_demo_podcasts(category, limit) # Build search query for podcasts search_query = f"{query} podcast {category}" print(f"🎙️ Searching YouTube for podcast: {search_query}") ydl_opts = { 'quiet': True, 'no_warnings': True, 'extract_flat': True, 'default_search': 'ytsearch10', 'socket_timeout': 30, # Increase timeout for network issues } with yt_dlp.YoutubeDL(ydl_opts) as ydl: result = ydl.extract_info(f"ytsearch10:{search_query}", download=False) if not result: print("❌ No podcast results found") return self._get_demo_podcasts(category, limit) entries = result.get('entries', []) if not entries: print("❌ No podcast entries found") return self._get_demo_podcasts(category, limit) # Filter and format results, checking embeddability podcasts = [] random.shuffle(entries) # Shuffle first for variety for entry in entries: if entry is None: continue if len(podcasts) >= limit: break video_id = entry.get('id', '') title = entry.get('title', 'Unknown Podcast') channel = entry.get('uploader', entry.get('channel', 'Unknown Host')) duration = entry.get('duration', 0) if video_id: # Check if video is embeddable if not self.check_video_embeddable(video_id): print(f" ✗ Skipping non-embeddable podcast: {title[:50]}") continue podcasts.append({ "title": title, "description": f"Podcast episode about {category}", "host": channel, "duration": f"{duration // 60} min" if duration else "Unknown", "duration_seconds": duration or 0, "category": category, "rating": round(random.uniform(4.0, 5.0), 1), "source": "youtube", "youtube_id": video_id, "url": f"https://www.youtube.com/watch?v={video_id}" }) print(f" ✓ Found embeddable podcast: {title[:50]}") if podcasts: print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube") return podcasts else: return self._get_demo_podcasts(category, limit) # Success! Break out of retry loop break except (yt_dlp.utils.DownloadError, Exception) as e: error_str = str(e) # Check for DNS/network errors if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]): if attempt < max_retries - 1: print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...") print(f" Retrying in {retry_delay}s...") time_module.sleep(retry_delay) retry_delay *= 2 # Exponential backoff continue else: print(f"❌ Network error after {max_retries} attempts. Using demo podcasts.") return self._get_demo_podcasts(category, limit) else: # Other errors, don't retry print(f"❌ Error searching YouTube for podcasts: {e}") return self._get_demo_podcasts(category, limit) return self._get_demo_podcasts(category, limit) def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]: """Get demo podcasts as fallback""" demo_podcasts = { "technology": [ { "title": "The AI Revolution", "description": "Exploring the latest in artificial intelligence and machine learning", "host": "Dr. Sarah Chen", "duration": "45 min", "category": "technology", "rating": 4.8, "source": "demo" }, { "title": "Code & Coffee", "description": "Daily dose of programming tips and tech news", "host": "Alex Rodriguez", "duration": "30 min", "category": "technology", "rating": 4.6, "source": "demo" } ], "business": [ { "title": "Startup Stories", "description": "Interviews with successful entrepreneurs", "host": "Michael Zhang", "duration": "50 min", "category": "business", "rating": 4.7, "source": "demo" } ], "comedy": [ { "title": "Daily Laughs", "description": "Your daily dose of comedy and humor", "host": "Jenny Smith", "duration": "35 min", "category": "comedy", "rating": 4.5, "source": "demo" } ], "education": [ { "title": "Learn Something New", "description": "Fascinating facts and educational content", "host": "Prof. David Lee", "duration": "40 min", "category": "education", "rating": 4.9, "source": "demo" } ], "news": [ { "title": "World Today", "description": "Daily news analysis and commentary", "host": "Maria Garcia", "duration": "25 min", "category": "news", "rating": 4.6, "source": "demo" } ] } podcasts = demo_podcasts.get(category, demo_podcasts["technology"]) return podcasts[:limit] def get_trending_podcasts(self, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]: """ Get trending podcasts by category from YouTube Args: category: Podcast category (technology, business, comedy, education, news) limit: Number of podcasts to return Returns: List of podcast information """ # Search YouTube for trending podcasts in this category search_terms = { "technology": "tech podcast 2024", "business": "business podcast interview", "comedy": "comedy podcast funny", "education": "educational podcast learn", "news": "news podcast analysis", "true-crime": "true crime podcast" } query = search_terms.get(category, f"{category} podcast") return self.search_youtube_podcast(query, category, limit) def get_personalized_podcasts(self, user_preferences: Dict[str, Any]) -> List[Dict[str, Any]]: """ Get personalized podcast recommendations from YouTube Args: user_preferences: User's podcast preferences Returns: List of recommended podcasts """ interests = user_preferences.get("podcast_interests", ["technology"]) recommendations = [] for interest in interests[:2]: # Limit to 2 categories for speed podcasts = self.get_trending_podcasts(category=interest, limit=1) recommendations.extend(podcasts) return recommendations def get_tools_definition(self) -> List[Dict[str, Any]]: """Return MCP tools definition for this server""" return [ { "name": "get_trending_podcasts", "description": "Get trending podcasts by category from YouTube", "parameters": { "type": "object", "properties": { "category": { "type": "string", "description": "Podcast category (technology, business, comedy, education, news, true-crime)" }, "limit": { "type": "integer", "description": "Number of podcasts to return" } }, "required": ["category"] } }, { "name": "get_personalized_podcasts", "description": "Get personalized podcast recommendations from YouTube", "parameters": { "type": "object", "properties": { "user_preferences": { "type": "object", "description": "User's podcast preferences" } }, "required": ["user_preferences"] } }, { "name": "search_youtube_podcast", "description": "Search YouTube for specific podcasts", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query for podcasts" }, "category": { "type": "string", "description": "Podcast category for context" }, "limit": { "type": "integer", "description": "Number of results to return" } }, "required": ["query"] } } ]