Nikita Makarov commited on
Commit
2d5a12f
·
1 Parent(s): 11a8acd

Add retry logic and DNS error handling for YouTube searches

Browse files
src/mcp_servers/music_server.py CHANGED
@@ -4,6 +4,8 @@ import requests
4
  import yt_dlp
5
  import os
6
  import random
 
 
7
  from typing import List, Dict, Any, Optional
8
  from dataclasses import dataclass
9
 
@@ -33,6 +35,14 @@ class MusicMCPServer:
33
  self._recently_played = [] # List of video IDs
34
  self._max_recent = 20 # Remember last 20 tracks
35
 
 
 
 
 
 
 
 
 
36
  def _rate_limit_youtube(self):
37
  """Enforce rate limiting for YouTube API calls"""
38
  import time as time_module
@@ -125,7 +135,7 @@ class MusicMCPServer:
125
 
126
  def search_youtube_music(self, query: str, limit: int = 5, fast: bool = False, check_embed: bool = False) -> List[Dict[str, Any]]:
127
  """
128
- Search for free music on YouTube
129
 
130
  Args:
131
  query: Search query (e.g., "pop music", "jazz instrumental", "song name")
@@ -139,89 +149,130 @@ class MusicMCPServer:
139
  # Apply rate limiting
140
  self._rate_limit_youtube()
141
 
 
 
 
 
 
142
  tracks = []
143
- try:
144
- # Use extract_flat for faster search (no full video info)
145
- ydl_opts = {
146
- 'quiet': True,
147
- 'no_warnings': True,
148
- 'extract_flat': True, # Fast: only get basic info
149
- 'default_search': 'ytsearch',
150
- }
151
-
152
- # Search for more results to allow for filtering and random selection
153
- # Increase limit to account for filtering out recently played
154
- search_limit = max(limit * 3, 15)
155
-
156
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
157
- # Don't add "music" if query already contains it or is specific
158
- if "music" not in query.lower() and len(query.split()) < 4:
159
- search_query = f"ytsearch{search_limit}:{query} music"
160
- else:
161
- search_query = f"ytsearch{search_limit}:{query}"
162
-
163
- print(f"🔍 YouTube search query: '{search_query}'")
164
- results = ydl.extract_info(search_query, download=False)
165
 
166
- # Handle different result formats from yt-dlp
167
- entries = None
 
 
 
 
 
 
168
 
169
- if isinstance(results, dict):
170
- if 'entries' in results:
171
- entries = results['entries']
172
- elif 'id' in results:
173
- entries = [results]
174
- elif isinstance(results, list):
175
- entries = results
176
 
177
- if entries:
178
- # Filter valid entries
179
- valid_entries = []
180
- for entry in entries:
181
- if entry and isinstance(entry, dict):
182
- video_id = entry.get('id') or entry.get('url', '')
183
- if video_id and video_id != 'None':
184
- valid_entries.append(entry)
185
 
186
- # Randomly shuffle to avoid always picking top results
187
- if len(valid_entries) > 1:
188
- random.shuffle(valid_entries)
189
 
190
- # Filter, check embeddability, avoid recently played, and take requested limit
191
- for entry in valid_entries:
192
- if len(tracks) >= limit:
193
- break
194
-
195
- video_id = entry.get('id') or entry.get('url', '')
196
- if video_id:
197
- # Skip recently played tracks
198
- if self._is_recently_played(video_id):
199
- print(f" ✗ Skipping recently played: {entry.get('title', 'Unknown')}")
200
- continue
201
-
202
- # Check if video is embeddable (optional)
203
- if check_embed and not self.check_video_embeddable(video_id):
204
- print(f" ✗ Skipping non-embeddable: {entry.get('title', 'Unknown')}")
205
- continue
206
-
207
- track = {
208
- "title": entry.get('title', 'Unknown'),
209
- "artist": entry.get('uploader', entry.get('channel', 'Unknown Artist')),
210
- "url": f"https://www.youtube.com/watch?v={video_id}",
211
- "youtube_id": video_id,
212
- "duration": entry.get('duration', 0),
213
- "genre": query.split()[0] if query else "unknown",
214
- "source": "youtube"
215
- }
216
- tracks.append(track)
217
- # Mark as recently played
218
- self._add_to_recently_played(video_id)
219
- print(f" ✓ Found: {track['title']} by {track['artist']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- except Exception as e:
222
- print(f"❌ Error searching YouTube: {e}")
223
- import traceback
224
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  return tracks
227
 
 
4
  import yt_dlp
5
  import os
6
  import random
7
+ import socket
8
+ import time as time_module
9
  from typing import List, Dict, Any, Optional
10
  from dataclasses import dataclass
11
 
 
35
  self._recently_played = [] # List of video IDs
36
  self._max_recent = 20 # Remember last 20 tracks
37
 
38
+ def _check_youtube_available(self) -> bool:
39
+ """Check if YouTube is accessible via DNS"""
40
+ try:
41
+ socket.gethostbyname('www.youtube.com')
42
+ return True
43
+ except socket.gaierror:
44
+ return False
45
+
46
  def _rate_limit_youtube(self):
47
  """Enforce rate limiting for YouTube API calls"""
48
  import time as time_module
 
135
 
136
  def search_youtube_music(self, query: str, limit: int = 5, fast: bool = False, check_embed: bool = False) -> List[Dict[str, Any]]:
137
  """
138
+ Search for free music on YouTube with retry logic for network issues
139
 
140
  Args:
141
  query: Search query (e.g., "pop music", "jazz instrumental", "song name")
 
149
  # Apply rate limiting
150
  self._rate_limit_youtube()
151
 
152
+ # Check if YouTube is accessible before attempting search
153
+ if not self._check_youtube_available():
154
+ print("⚠️ YouTube is not accessible (DNS/network issue). Skipping YouTube search.")
155
+ return []
156
+
157
  tracks = []
158
+ max_retries = 3
159
+ retry_delay = 2 # Start with 2 seconds
160
+
161
+ for attempt in range(max_retries):
162
+ try:
163
+ # Try to resolve DNS first (helps diagnose network issues)
164
+ try:
165
+ socket.gethostbyname('www.youtube.com')
166
+ except socket.gaierror as dns_error:
167
+ if attempt < max_retries - 1:
168
+ print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
169
+ time_module.sleep(retry_delay)
170
+ retry_delay *= 2 # Exponential backoff
171
+ continue
172
+ else:
173
+ print(f" DNS resolution failed after {max_retries} attempts. YouTube may be blocked or network unavailable.")
174
+ return tracks # Return empty list, will fallback to SoundCloud
 
 
 
 
 
175
 
176
+ # Use extract_flat for faster search (no full video info)
177
+ ydl_opts = {
178
+ 'quiet': True,
179
+ 'no_warnings': True,
180
+ 'extract_flat': True, # Fast: only get basic info
181
+ 'default_search': 'ytsearch',
182
+ 'socket_timeout': 30, # Increase timeout for network issues
183
+ }
184
 
185
+ # Search for more results to allow for filtering and random selection
186
+ # Increase limit to account for filtering out recently played
187
+ search_limit = max(limit * 3, 15)
 
 
 
 
188
 
189
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
190
+ # Don't add "music" if query already contains it or is specific
191
+ if "music" not in query.lower() and len(query.split()) < 4:
192
+ search_query = f"ytsearch{search_limit}:{query} music"
193
+ else:
194
+ search_query = f"ytsearch{search_limit}:{query}"
 
 
195
 
196
+ print(f"🔍 YouTube search query: '{search_query}'")
197
+ results = ydl.extract_info(search_query, download=False)
 
198
 
199
+ # Handle different result formats from yt-dlp
200
+ entries = None
201
+
202
+ if isinstance(results, dict):
203
+ if 'entries' in results:
204
+ entries = results['entries']
205
+ elif 'id' in results:
206
+ entries = [results]
207
+ elif isinstance(results, list):
208
+ entries = results
209
+
210
+ if entries:
211
+ # Filter valid entries
212
+ valid_entries = []
213
+ for entry in entries:
214
+ if entry and isinstance(entry, dict):
215
+ video_id = entry.get('id') or entry.get('url', '')
216
+ if video_id and video_id != 'None':
217
+ valid_entries.append(entry)
218
+
219
+ # Randomly shuffle to avoid always picking top results
220
+ if len(valid_entries) > 1:
221
+ random.shuffle(valid_entries)
222
+
223
+ # Filter, check embeddability, avoid recently played, and take requested limit
224
+ for entry in valid_entries:
225
+ if len(tracks) >= limit:
226
+ break
227
+
228
+ video_id = entry.get('id') or entry.get('url', '')
229
+ if video_id:
230
+ # Skip recently played tracks
231
+ if self._is_recently_played(video_id):
232
+ print(f" ✗ Skipping recently played: {entry.get('title', 'Unknown')}")
233
+ continue
234
+
235
+ # Check if video is embeddable (optional)
236
+ if check_embed and not self.check_video_embeddable(video_id):
237
+ print(f" ✗ Skipping non-embeddable: {entry.get('title', 'Unknown')}")
238
+ continue
239
+
240
+ track = {
241
+ "title": entry.get('title', 'Unknown'),
242
+ "artist": entry.get('uploader', entry.get('channel', 'Unknown Artist')),
243
+ "url": f"https://www.youtube.com/watch?v={video_id}",
244
+ "youtube_id": video_id,
245
+ "duration": entry.get('duration', 0),
246
+ "genre": query.split()[0] if query else "unknown",
247
+ "source": "youtube"
248
+ }
249
+ tracks.append(track)
250
+ # Mark as recently played
251
+ self._add_to_recently_played(video_id)
252
+ print(f" ✓ Found: {track['title']} by {track['artist']}")
253
+
254
+ # Success! Break out of retry loop
255
+ break
256
 
257
+ except (yt_dlp.utils.DownloadError, Exception) as e:
258
+ error_str = str(e)
259
+ # Check for DNS/network errors
260
+ if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]):
261
+ if attempt < max_retries - 1:
262
+ print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...")
263
+ print(f" Retrying in {retry_delay}s...")
264
+ time_module.sleep(retry_delay)
265
+ retry_delay *= 2 # Exponential backoff
266
+ continue
267
+ else:
268
+ print(f"❌ Network error after {max_retries} attempts. YouTube unavailable.")
269
+ return tracks
270
+ else:
271
+ # Other errors, don't retry
272
+ print(f"❌ Error searching YouTube: {e}")
273
+ import traceback
274
+ traceback.print_exc()
275
+ break
276
 
277
  return tracks
278
 
src/mcp_servers/podcast_server.py CHANGED
@@ -1,6 +1,8 @@
1
  """MCP Server for Podcast Recommendations - YouTube Integration"""
2
  from typing import List, Dict, Any
3
  import random
 
 
4
 
5
  # Try to import yt-dlp
6
  try:
@@ -25,6 +27,14 @@ class PodcastMCPServer:
25
  self._recently_played = []
26
  self._max_recent = 10
27
 
 
 
 
 
 
 
 
 
28
  def _rate_limit_youtube(self):
29
  """Enforce rate limiting for YouTube API calls"""
30
  import time as time_module
@@ -88,7 +98,7 @@ class PodcastMCPServer:
88
 
89
  def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
90
  """
91
- Search YouTube for podcasts
92
 
93
  Args:
94
  query: Search query
@@ -102,78 +112,120 @@ class PodcastMCPServer:
102
  print("⚠️ yt-dlp not available, using demo podcasts")
103
  return self._get_demo_podcasts(category, limit)
104
 
 
 
 
 
 
105
  # Apply rate limiting
106
  self._rate_limit_youtube()
107
 
108
- try:
109
- # Build search query for podcasts
110
- search_query = f"{query} podcast {category}"
111
- print(f"🎙️ Searching YouTube for podcast: {search_query}")
112
-
113
- ydl_opts = {
114
- 'quiet': True,
115
- 'no_warnings': True,
116
- 'extract_flat': True,
117
- 'default_search': 'ytsearch10',
118
- }
119
-
120
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
121
- result = ydl.extract_info(f"ytsearch10:{search_query}", download=False)
122
-
123
- if not result:
124
- print("❌ No podcast results found")
125
- return self._get_demo_podcasts(category, limit)
126
 
127
- entries = result.get('entries', [])
128
- if not entries:
129
- print(" No podcast entries found")
130
- return self._get_demo_podcasts(category, limit)
131
 
132
- # Filter and format results, checking embeddability
133
- podcasts = []
134
- random.shuffle(entries) # Shuffle first for variety
 
 
 
 
135
 
136
- for entry in entries:
137
- if entry is None:
138
- continue
 
 
 
139
 
140
- if len(podcasts) >= limit:
141
- break
 
 
142
 
143
- video_id = entry.get('id', '')
144
- title = entry.get('title', 'Unknown Podcast')
145
- channel = entry.get('uploader', entry.get('channel', 'Unknown Host'))
146
- duration = entry.get('duration', 0)
147
 
148
- if video_id:
149
- # Check if video is embeddable
150
- if not self.check_video_embeddable(video_id):
151
- print(f" ✗ Skipping non-embeddable podcast: {title[:50]}")
152
  continue
153
 
154
- podcasts.append({
155
- "title": title,
156
- "description": f"Podcast episode about {category}",
157
- "host": channel,
158
- "duration": f"{duration // 60} min" if duration else "Unknown",
159
- "duration_seconds": duration or 0,
160
- "category": category,
161
- "rating": round(random.uniform(4.0, 5.0), 1),
162
- "source": "youtube",
163
- "youtube_id": video_id,
164
- "url": f"https://www.youtube.com/watch?v={video_id}"
165
- })
166
- print(f" ✓ Found embeddable podcast: {title[:50]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
- if podcasts:
169
- print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube")
170
- return podcasts
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  else:
 
 
172
  return self._get_demo_podcasts(category, limit)
173
-
174
- except Exception as e:
175
- print(f"❌ Error searching YouTube for podcasts: {e}")
176
- return self._get_demo_podcasts(category, limit)
177
 
178
  def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]:
179
  """Get demo podcasts as fallback"""
 
1
  """MCP Server for Podcast Recommendations - YouTube Integration"""
2
  from typing import List, Dict, Any
3
  import random
4
+ import socket
5
+ import time as time_module
6
 
7
  # Try to import yt-dlp
8
  try:
 
27
  self._recently_played = []
28
  self._max_recent = 10
29
 
30
+ def _check_youtube_available(self) -> bool:
31
+ """Check if YouTube is accessible via DNS"""
32
+ try:
33
+ socket.gethostbyname('www.youtube.com')
34
+ return True
35
+ except socket.gaierror:
36
+ return False
37
+
38
  def _rate_limit_youtube(self):
39
  """Enforce rate limiting for YouTube API calls"""
40
  import time as time_module
 
98
 
99
  def search_youtube_podcast(self, query: str, category: str = "technology", limit: int = 5) -> List[Dict[str, Any]]:
100
  """
101
+ Search YouTube for podcasts with retry logic for network issues
102
 
103
  Args:
104
  query: Search query
 
112
  print("⚠️ yt-dlp not available, using demo podcasts")
113
  return self._get_demo_podcasts(category, limit)
114
 
115
+ # Check if YouTube is accessible before attempting search
116
+ if not self._check_youtube_available():
117
+ print("⚠️ YouTube is not accessible (DNS/network issue). Using demo podcasts.")
118
+ return self._get_demo_podcasts(category, limit)
119
+
120
  # Apply rate limiting
121
  self._rate_limit_youtube()
122
 
123
+ max_retries = 3
124
+ retry_delay = 2 # Start with 2 seconds
125
+
126
+ for attempt in range(max_retries):
127
+ try:
128
+ # Try to resolve DNS first (helps diagnose network issues)
129
+ try:
130
+ socket.gethostbyname('www.youtube.com')
131
+ except socket.gaierror as dns_error:
132
+ if attempt < max_retries - 1:
133
+ print(f"⚠️ DNS resolution failed (attempt {attempt + 1}/{max_retries}), retrying in {retry_delay}s...")
134
+ time_module.sleep(retry_delay)
135
+ retry_delay *= 2 # Exponential backoff
136
+ continue
137
+ else:
138
+ print(f"❌ DNS resolution failed after {max_retries} attempts. Using demo podcasts.")
139
+ return self._get_demo_podcasts(category, limit)
 
140
 
141
+ # Build search query for podcasts
142
+ search_query = f"{query} podcast {category}"
143
+ print(f"🎙️ Searching YouTube for podcast: {search_query}")
 
144
 
145
+ ydl_opts = {
146
+ 'quiet': True,
147
+ 'no_warnings': True,
148
+ 'extract_flat': True,
149
+ 'default_search': 'ytsearch10',
150
+ 'socket_timeout': 30, # Increase timeout for network issues
151
+ }
152
 
153
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
154
+ result = ydl.extract_info(f"ytsearch10:{search_query}", download=False)
155
+
156
+ if not result:
157
+ print("❌ No podcast results found")
158
+ return self._get_demo_podcasts(category, limit)
159
 
160
+ entries = result.get('entries', [])
161
+ if not entries:
162
+ print("❌ No podcast entries found")
163
+ return self._get_demo_podcasts(category, limit)
164
 
165
+ # Filter and format results, checking embeddability
166
+ podcasts = []
167
+ random.shuffle(entries) # Shuffle first for variety
 
168
 
169
+ for entry in entries:
170
+ if entry is None:
 
 
171
  continue
172
 
173
+ if len(podcasts) >= limit:
174
+ break
175
+
176
+ video_id = entry.get('id', '')
177
+ title = entry.get('title', 'Unknown Podcast')
178
+ channel = entry.get('uploader', entry.get('channel', 'Unknown Host'))
179
+ duration = entry.get('duration', 0)
180
+
181
+ if video_id:
182
+ # Check if video is embeddable
183
+ if not self.check_video_embeddable(video_id):
184
+ print(f" ✗ Skipping non-embeddable podcast: {title[:50]}")
185
+ continue
186
+
187
+ podcasts.append({
188
+ "title": title,
189
+ "description": f"Podcast episode about {category}",
190
+ "host": channel,
191
+ "duration": f"{duration // 60} min" if duration else "Unknown",
192
+ "duration_seconds": duration or 0,
193
+ "category": category,
194
+ "rating": round(random.uniform(4.0, 5.0), 1),
195
+ "source": "youtube",
196
+ "youtube_id": video_id,
197
+ "url": f"https://www.youtube.com/watch?v={video_id}"
198
+ })
199
+ print(f" ✓ Found embeddable podcast: {title[:50]}")
200
+
201
+ if podcasts:
202
+ print(f"✅ Found {len(podcasts)} embeddable podcasts on YouTube")
203
+ return podcasts
204
+ else:
205
+ return self._get_demo_podcasts(category, limit)
206
 
207
+ # Success! Break out of retry loop
208
+ break
209
+
210
+ except (yt_dlp.utils.DownloadError, Exception) as e:
211
+ error_str = str(e)
212
+ # Check for DNS/network errors
213
+ if any(keyword in error_str for keyword in ["Failed to resolve", "No address associated", "NameResolutionError", "gaierror"]):
214
+ if attempt < max_retries - 1:
215
+ print(f"⚠️ Network/DNS error (attempt {attempt + 1}/{max_retries}): {error_str[:100]}...")
216
+ print(f" Retrying in {retry_delay}s...")
217
+ time_module.sleep(retry_delay)
218
+ retry_delay *= 2 # Exponential backoff
219
+ continue
220
+ else:
221
+ print(f"❌ Network error after {max_retries} attempts. Using demo podcasts.")
222
+ return self._get_demo_podcasts(category, limit)
223
  else:
224
+ # Other errors, don't retry
225
+ print(f"❌ Error searching YouTube for podcasts: {e}")
226
  return self._get_demo_podcasts(category, limit)
227
+
228
+ return self._get_demo_podcasts(category, limit)
 
 
229
 
230
  def _get_demo_podcasts(self, category: str, limit: int) -> List[Dict[str, Any]]:
231
  """Get demo podcasts as fallback"""