AI-RADIO / src /mcp_servers /podcast_server.py
Nikita Makarov
Add retry logic and DNS error handling for YouTube searches
2d5a12f
"""MCP Server for Podcast Recommendations - YouTube Integration"""
from typing import List, Dict, Any
import random
import socket
import time as time_module
# Try to import yt-dlp
try:
import yt_dlp
YT_DLP_AVAILABLE = True
except ImportError:
YT_DLP_AVAILABLE = False
print("⚠️ yt-dlp not available for podcast search")
class PodcastMCPServer:
"""MCP Server for podcast discovery and recommendations via YouTube"""
def __init__(self):
self.name = "podcast_server"
self.description = "Provides podcast recommendations from YouTube"
self._embed_cache = {}
# Rate limiting
self._last_youtube_call = 0
self._min_call_interval = 3.0 # Minimum 3 seconds between YouTube calls
# Track recently played podcasts
self._recently_played = []
self._max_recent = 10
def _check_youtube_available(self) -> bool:
"""Check if YouTube is accessible via DNS"""
try:
socket.gethostbyname('www.youtube.com')
return True
except socket.gaierror:
return False
def _rate_limit_youtube(self):
"""Enforce rate limiting for YouTube API calls"""
import time as time_module
current_time = time_module.time()
elapsed = current_time - self._last_youtube_call
if elapsed < self._min_call_interval:
sleep_time = self._min_call_interval - elapsed
print(f"⏳ Podcast rate limiting: waiting {sleep_time:.1f}s...")
time_module.sleep(sleep_time)
self._last_youtube_call = time_module.time()
def check_video_embeddable(self, video_id: str) -> bool:
"""Check if a YouTube video is embeddable"""
if video_id in self._embed_cache:
return self._embed_cache[video_id]
if not YT_DLP_AVAILABLE:
return True # Assume embeddable if we can't check
try:
ydl_opts = {
'quiet': True,
'no_warnings': True,
'skip_download': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
url = f"https://www.youtube.com/watch?v={video_id}"
info = ydl.extract_info(url, download=False)
if not info:
self._embed_cache[video_id] = False
return False
# Check availability
availability = info.get('availability')
if availability and availability != 'public':
print(f"⚠️ Podcast {video_id}: not public ({availability})")
self._embed_cache[video_id] = False
return False
# Check age restriction
if info.get('age_limit', 0) > 0:
print(f"⚠️ Podcast {video_id}: age restricted")
self._embed_cache[video_id] = False
return False
# Check embed playability
if info.get('playable_in_embed') is False:
print(f"⚠️ Podcast {video_id}: not playable in embed")
self._embed_cache[video_id] = False
return False
self._embed_cache[video_id] = True
return True
except Exception as e:
print(f"⚠️ Podcast {video_id}: check failed - {e}")
self._embed_cache[video_id] = False
return False
def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
"""
Search YouTube for podcasts with retry logic for network issues
Args:
query: Search query
category: Podcast category for context
limit: Number of results to return
Returns:
List of podcast information with YouTube URLs
"""
if not YT_DLP_AVAILABLE:
print("⚠️ yt-dlp not available, using demo podcasts")
return self._get_demo_podcasts(category, limit)
# Check if YouTube is accessible before attempting search
if not self._check_youtube_available():
print("⚠️ YouTube is not accessible (DNS/network issue). Using demo podcasts.")
return self._get_demo_podcasts(category, limit)
# Apply rate limiting
self._rate_limit_youtube()
max_retries = 3
retry_delay = 2 # Start with 2 seconds
for attempt in range(max_retries):
try:
# Try to resolve DNS first (helps diagnose network issues)
try:
socket.gethostbyname('www.youtube.com')
except socket.gaierror as dns_error:
if attempt < max_retries - 1:
print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
time_module.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
continue
else:
print(f"❌ DNS resolution failed after {max_retries} attempts. Using demo podcasts.")
return self._get_demo_podcasts(category, limit)
# Build search query for podcasts
search_query = f"{query} podcast {category}"
print(f"🎙️ Searching YouTube for podcast: {search_query}")
ydl_opts = {
'quiet': True,
'no_warnings': True,
'extract_flat': True,
'default_search': 'ytsearch10',
'socket_timeout': 30, # Increase timeout for network issues
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(f"ytsearch10:{search_query}", download=False)
if not result:
print("❌ No podcast results found")
return self._get_demo_podcasts(category, limit)
entries = result.get('entries', [])
if not entries:
print("❌ No podcast entries found")
return self._get_demo_podcasts(category, limit)
# Filter and format results, checking embeddability
podcasts = []
random.shuffle(entries) # Shuffle first for variety
for entry in entries:
if entry is None:
continue
if len(podcasts) >= limit:
break
video_id = entry.get('id', '')
title = entry.get('title', 'Unknown Podcast')
channel = entry.get('uploader', entry.get('channel', 'Unknown Host'))
duration = entry.get('duration', 0)
if video_id:
# Check if video is embeddable
if not self.check_video_embeddable(video_id):
print(f" ✗ Skipping non-embeddable podcast: {title[:50]}")
continue
podcasts.append({
"title": title,
"description": f"Podcast episode about {category}",
"host": channel,
"duration": f"{duration // 60} min" if duration else "Unknown",
"duration_seconds": duration or 0,
"category": category,
"rating": round(random.uniform(4.0, 5.0), 1),
"source": "youtube",
"youtube_id": video_id,
"url": f"https://www.youtube.com/watch?v={video_id}"
})
print(f" ✓ Found embeddable podcast: {title[:50]}")
if podcasts:
print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube")
return podcasts
else:
return self._get_demo_podcasts(category, limit)
# Success! Break out of retry loop
break
except (yt_dlp.utils.DownloadError, Exception) as e:
error_str = str(e)
# Check for DNS/network errors
if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]):
if attempt < max_retries - 1:
print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...")
print(f" Retrying in {retry_delay}s...")
time_module.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
continue
else:
print(f"❌ Network error after {max_retries} attempts. Using demo podcasts.")
return self._get_demo_podcasts(category, limit)
else:
# Other errors, don't retry
print(f"❌ Error searching YouTube for podcasts: {e}")
return self._get_demo_podcasts(category, limit)
return self._get_demo_podcasts(category, limit)
def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]:
"""Get demo podcasts as fallback"""
demo_podcasts = {
"technology": [
{
"title": "The AI Revolution",
"description": "Exploring the latest in artificial intelligence and machine learning",
"host": "Dr. Sarah Chen",
"duration": "45 min",
"category": "technology",
"rating": 4.8,
"source": "demo"
},
{
"title": "Code & Coffee",
"description": "Daily dose of programming tips and tech news",
"host": "Alex Rodriguez",
"duration": "30 min",
"category": "technology",
"rating": 4.6,
"source": "demo"
}
],
"business": [
{
"title": "Startup Stories",
"description": "Interviews with successful entrepreneurs",
"host": "Michael Zhang",
"duration": "50 min",
"category": "business",
"rating": 4.7,
"source": "demo"
}
],
"comedy": [
{
"title": "Daily Laughs",
"description": "Your daily dose of comedy and humor",
"host": "Jenny Smith",
"duration": "35 min",
"category": "comedy",
"rating": 4.5,
"source": "demo"
}
],
"education": [
{
"title": "Learn Something New",
"description": "Fascinating facts and educational content",
"host": "Prof. David Lee",
"duration": "40 min",
"category": "education",
"rating": 4.9,
"source": "demo"
}
],
"news": [
{
"title": "World Today",
"description": "Daily news analysis and commentary",
"host": "Maria Garcia",
"duration": "25 min",
"category": "news",
"rating": 4.6,
"source": "demo"
}
]
}
podcasts = demo_podcasts.get(category, demo_podcasts["technology"])
return podcasts[:limit]
def get_trending_podcasts(self, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
"""
Get trending podcasts by category from YouTube
Args:
category: Podcast category (technology, business, comedy, education, news)
limit: Number of podcasts to return
Returns:
List of podcast information
"""
# Search YouTube for trending podcasts in this category
search_terms = {
"technology": "tech podcast 2024",
"business": "business podcast interview",
"comedy": "comedy podcast funny",
"education": "educational podcast learn",
"news": "news podcast analysis",
"true-crime": "true crime podcast"
}
query = search_terms.get(category, f"{category} podcast")
return self.search_youtube_podcast(query, category, limit)
def get_personalized_podcasts(self, user_preferences: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Get personalized podcast recommendations from YouTube
Args:
user_preferences: User's podcast preferences
Returns:
List of recommended podcasts
"""
interests = user_preferences.get("podcast_interests", ["technology"])
recommendations = []
for interest in interests[:2]: # Limit to 2 categories for speed
podcasts = self.get_trending_podcasts(category=interest, limit=1)
recommendations.extend(podcasts)
return recommendations
def get_tools_definition(self) -> List[Dict[str, Any]]:
"""Return MCP tools definition for this server"""
return [
{
"name": "get_trending_podcasts",
"description": "Get trending podcasts by category from YouTube",
"parameters": {
"type": "object",
"properties": {
"category": {
"type": "string",
"description": "Podcast category (technology, business, comedy, education, news, true-crime)"
},
"limit": {
"type": "integer",
"description": "Number of podcasts to return"
}
},
"required": ["category"]
}
},
{
"name": "get_personalized_podcasts",
"description": "Get personalized podcast recommendations from YouTube",
"parameters": {
"type": "object",
"properties": {
"user_preferences": {
"type": "object",
"description": "User's podcast preferences"
}
},
"required": ["user_preferences"]
}
},
{
"name": "search_youtube_podcast",
"description": "Search YouTube for specific podcasts",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query for podcasts"
},
"category": {
"type": "string",
"description": "Podcast category for context"
},
"limit": {
"type": "integer",
"description": "Number of results to return"
}
},
"required": ["query"]
}
}
]