Spaces:

MCP-1st-Birthday
/

AI-RADIO

Sleeping

AI-RADIO / src /mcp_servers /podcast_server.py

Nikita Makarov

Add retry logic and DNS error handling for YouTube searches

2d5a12f 3 months ago

16.8 kB

	"""MCP Server for Podcast Recommendations - YouTube Integration"""
	from typing import List, Dict, Any
	import random
	import socket
	import time as time_module

	# Try to import yt-dlp
	try:
	import yt_dlp
	YT_DLP_AVAILABLE = True
	except ImportError:
	YT_DLP_AVAILABLE = False
	print("⚠️ yt-dlp not available for podcast search")


	class PodcastMCPServer:
	"""MCP Server for podcast discovery and recommendations via YouTube"""

	def __init__(self):
	self.name = "podcast_server"
	self.description = "Provides podcast recommendations from YouTube"
	self._embed_cache = {}
	# Rate limiting
	self._last_youtube_call = 0
	self._min_call_interval = 3.0 # Minimum 3 seconds between YouTube calls
	# Track recently played podcasts
	self._recently_played = []
	self._max_recent = 10

	def _check_youtube_available(self) -> bool:
	"""Check if YouTube is accessible via DNS"""
	try:
	socket.gethostbyname('www.youtube.com')
	return True
	except socket.gaierror:
	return False

	def _rate_limit_youtube(self):
	"""Enforce rate limiting for YouTube API calls"""
	import time as time_module
	current_time = time_module.time()
	elapsed = current_time - self._last_youtube_call
	if elapsed < self._min_call_interval:
	sleep_time = self._min_call_interval - elapsed
	print(f"⏳ Podcast rate limiting: waiting {sleep_time:.1f}s...")
	time_module.sleep(sleep_time)
	self._last_youtube_call = time_module.time()

	def check_video_embeddable(self, video_id: str) -> bool:
	"""Check if a YouTube video is embeddable"""
	if video_id in self._embed_cache:
	return self._embed_cache[video_id]

	if not YT_DLP_AVAILABLE:
	return True # Assume embeddable if we can't check

	try:
	ydl_opts = {
	'quiet': True,
	'no_warnings': True,
	'skip_download': True,
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	url = f"https://www.youtube.com/watch?v={video_id}"
	info = ydl.extract_info(url, download=False)

	if not info:
	self._embed_cache[video_id] = False
	return False

	# Check availability
	availability = info.get('availability')
	if availability and availability != 'public':
	print(f"⚠️ Podcast {video_id}: not public ({availability})")
	self._embed_cache[video_id] = False
	return False

	# Check age restriction
	if info.get('age_limit', 0) > 0:
	print(f"⚠️ Podcast {video_id}: age restricted")
	self._embed_cache[video_id] = False
	return False

	# Check embed playability
	if info.get('playable_in_embed') is False:
	print(f"⚠️ Podcast {video_id}: not playable in embed")
	self._embed_cache[video_id] = False
	return False

	self._embed_cache[video_id] = True
	return True

	except Exception as e:
	print(f"⚠️ Podcast {video_id}: check failed - {e}")
	self._embed_cache[video_id] = False
	return False

	def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
	"""
	Search YouTube for podcasts with retry logic for network issues

	Args:
	query: Search query
	category: Podcast category for context
	limit: Number of results to return

	Returns:
	List of podcast information with YouTube URLs
	"""
	if not YT_DLP_AVAILABLE:
	print("⚠️ yt-dlp not available, using demo podcasts")
	return self._get_demo_podcasts(category, limit)

	# Check if YouTube is accessible before attempting search
	if not self._check_youtube_available():
	print("⚠️ YouTube is not accessible (DNS/network issue). Using demo podcasts.")
	return self._get_demo_podcasts(category, limit)

	# Apply rate limiting
	self._rate_limit_youtube()

	max_retries = 3
	retry_delay = 2 # Start with 2 seconds

	for attempt in range(max_retries):
	try:
	# Try to resolve DNS first (helps diagnose network issues)
	try:
	socket.gethostbyname('www.youtube.com')
	except socket.gaierror as dns_error:
	if attempt < max_retries - 1:
	print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
	time_module.sleep(retry_delay)
	retry_delay *= 2 # Exponential backoff
	continue
	else:
	print(f"❌ DNS resolution failed after {max_retries} attempts. Using demo podcasts.")
	return self._get_demo_podcasts(category, limit)

	# Build search query for podcasts
	search_query = f"{query} podcast {category}"
	print(f"🎙️ Searching YouTube for podcast: {search_query}")

	ydl_opts = {
	'quiet': True,
	'no_warnings': True,
	'extract_flat': True,
	'default_search': 'ytsearch10',
	'socket_timeout': 30, # Increase timeout for network issues
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	result = ydl.extract_info(f"ytsearch10:{search_query}", download=False)

	if not result:
	print("❌ No podcast results found")
	return self._get_demo_podcasts(category, limit)

	entries = result.get('entries', [])
	if not entries:
	print("❌ No podcast entries found")
	return self._get_demo_podcasts(category, limit)

	# Filter and format results, checking embeddability
	podcasts = []
	random.shuffle(entries) # Shuffle first for variety

	for entry in entries:
	if entry is None:
	continue

	if len(podcasts) >= limit:
	break

	video_id = entry.get('id', '')
	title = entry.get('title', 'Unknown Podcast')
	channel = entry.get('uploader', entry.get('channel', 'Unknown Host'))
	duration = entry.get('duration', 0)

	if video_id:
	# Check if video is embeddable
	if not self.check_video_embeddable(video_id):
	print(f" ✗ Skipping non-embeddable podcast: {title[:50]}")
	continue

	podcasts.append({
	"title": title,
	"description": f"Podcast episode about {category}",
	"host": channel,
	"duration": f"{duration // 60} min" if duration else "Unknown",
	"duration_seconds": duration or 0,
	"category": category,
	"rating": round(random.uniform(4.0, 5.0), 1),
	"source": "youtube",
	"youtube_id": video_id,
	"url": f"https://www.youtube.com/watch?v={video_id}"
	})
	print(f" ✓ Found embeddable podcast: {title[:50]}")

	if podcasts:
	print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube")
	return podcasts
	else:
	return self._get_demo_podcasts(category, limit)

	# Success! Break out of retry loop
	break

	except (yt_dlp.utils.DownloadError, Exception) as e:
	error_str = str(e)
	# Check for DNS/network errors
	if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]):
	if attempt < max_retries - 1:
	print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...")
	print(f" Retrying in {retry_delay}s...")
	time_module.sleep(retry_delay)
	retry_delay *= 2 # Exponential backoff
	continue
	else:
	print(f"❌ Network error after {max_retries} attempts. Using demo podcasts.")
	return self._get_demo_podcasts(category, limit)
	else:
	# Other errors, don't retry
	print(f"❌ Error searching YouTube for podcasts: {e}")
	return self._get_demo_podcasts(category, limit)

	return self._get_demo_podcasts(category, limit)

	def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]:
	"""Get demo podcasts as fallback"""
	demo_podcasts = {
	"technology": [
	{
	"title": "The AI Revolution",
	"description": "Exploring the latest in artificial intelligence and machine learning",
	"host": "Dr. Sarah Chen",
	"duration": "45 min",
	"category": "technology",
	"rating": 4.8,
	"source": "demo"
	},
	{
	"title": "Code & Coffee",
	"description": "Daily dose of programming tips and tech news",
	"host": "Alex Rodriguez",
	"duration": "30 min",
	"category": "technology",
	"rating": 4.6,
	"source": "demo"
	}
	],
	"business": [
	{
	"title": "Startup Stories",
	"description": "Interviews with successful entrepreneurs",
	"host": "Michael Zhang",
	"duration": "50 min",
	"category": "business",
	"rating": 4.7,
	"source": "demo"
	}
	],
	"comedy": [
	{
	"title": "Daily Laughs",
	"description": "Your daily dose of comedy and humor",
	"host": "Jenny Smith",
	"duration": "35 min",
	"category": "comedy",
	"rating": 4.5,
	"source": "demo"
	}
	],
	"education": [
	{
	"title": "Learn Something New",
	"description": "Fascinating facts and educational content",
	"host": "Prof. David Lee",
	"duration": "40 min",
	"category": "education",
	"rating": 4.9,
	"source": "demo"
	}
	],
	"news": [
	{
	"title": "World Today",
	"description": "Daily news analysis and commentary",
	"host": "Maria Garcia",
	"duration": "25 min",
	"category": "news",
	"rating": 4.6,
	"source": "demo"
	}
	]
	}

	podcasts = demo_podcasts.get(category, demo_podcasts["technology"])
	return podcasts[:limit]

	def get_trending_podcasts(self, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
	"""
	Get trending podcasts by category from YouTube

	Args:
	category: Podcast category (technology, business, comedy, education, news)
	limit: Number of podcasts to return

	Returns:
	List of podcast information
	"""
	# Search YouTube for trending podcasts in this category
	search_terms = {
	"technology": "tech podcast 2024",
	"business": "business podcast interview",
	"comedy": "comedy podcast funny",
	"education": "educational podcast learn",
	"news": "news podcast analysis",
	"true-crime": "true crime podcast"
	}

	query = search_terms.get(category, f"{category} podcast")
	return self.search_youtube_podcast(query, category, limit)

	def get_personalized_podcasts(self, user_preferences: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Get personalized podcast recommendations from YouTube

	Args:
	user_preferences: User's podcast preferences

	Returns:
	List of recommended podcasts
	"""
	interests = user_preferences.get("podcast_interests", ["technology"])
	recommendations = []

	for interest in interests[:2]: # Limit to 2 categories for speed
	podcasts = self.get_trending_podcasts(category=interest, limit=1)
	recommendations.extend(podcasts)

	return recommendations

	def get_tools_definition(self) -> List[Dict[str, Any]]:
	"""Return MCP tools definition for this server"""
	return [
	{
	"name": "get_trending_podcasts",
	"description": "Get trending podcasts by category from YouTube",
	"parameters": {
	"type": "object",
	"properties": {
	"category": {
	"type": "string",
	"description": "Podcast category (technology, business, comedy, education, news, true-crime)"
	},
	"limit": {
	"type": "integer",
	"description": "Number of podcasts to return"
	}
	},
	"required": ["category"]
	}
	},
	{
	"name": "get_personalized_podcasts",
	"description": "Get personalized podcast recommendations from YouTube",
	"parameters": {
	"type": "object",
	"properties": {
	"user_preferences": {
	"type": "object",
	"description": "User's podcast preferences"
	}
	},
	"required": ["user_preferences"]
	}
	},
	{
	"name": "search_youtube_podcast",
	"description": "Search YouTube for specific podcasts",
	"parameters": {
	"type": "object",
	"properties": {
	"query": {
	"type": "string",
	"description": "Search query for podcasts"
	},
	"category": {
	"type": "string",
	"description": "Podcast category for context"
	},
	"limit": {
	"type": "integer",
	"description": "Number of results to return"
	}
	},
	"required": ["query"]
	}
	}
	]