Ralitza Mondal commited on
Commit
c066961
·
1 Parent(s): 67ac29c

✨ Switch to YouTube Data API for reliable video search

Browse files

- Replace web scraper with official YouTube Data API
- Add YouTubeAPIClient with proper error handling
- Update imports to try API first, fallback to scraper
- Fixes video search returning empty results
- More reliable and won't break from YouTube changes

Files changed (2) hide show
  1. multi_agent_coach.py +8 -1
  2. youtube_api.py +123 -0
multi_agent_coach.py CHANGED
@@ -14,7 +14,14 @@ from tavily import TavilyClient
14
 
15
  # Import API clients
16
  from riot_api import RiotAPI
17
- from youtube_scraper import YouTubeScraper
 
 
 
 
 
 
 
18
 
19
  # Import multi-agent components
20
  from multi_agent_router import create_router, QueryRouter
 
14
 
15
  # Import API clients
16
  from riot_api import RiotAPI
17
+ try:
18
+ # Try YouTube Data API first (more reliable)
19
+ from youtube_api import YouTubeAPIClient as YouTubeScraper
20
+ print("✅ Using YouTube Data API")
21
+ except Exception as e:
22
+ # Fallback to web scraper
23
+ from youtube_scraper import YouTubeScraper
24
+ print(f"⚠️ Using web scraper (API import failed: {e})")
25
 
26
  # Import multi-agent components
27
  from multi_agent_router import create_router, QueryRouter
youtube_api.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ YouTube Data API Client
3
+ More reliable alternative to web scraping
4
+ """
5
+
6
+ import os
7
+ from googleapiclient.discovery import build
8
+ from typing import List, Dict
9
+
10
+
11
+ class YouTubeAPIClient:
12
+ """YouTube Data API v3 client"""
13
+
14
+ def __init__(self, api_key: str = None):
15
+ """Initialize with API key from environment or parameter"""
16
+ self.api_key = api_key or os.getenv("YOUTUBE_API_KEY")
17
+ if not self.api_key:
18
+ raise ValueError("YouTube API key required")
19
+
20
+ self.youtube = build('youtube', 'v3', developerKey=self.api_key)
21
+
22
+ def search_videos(self, query: str, max_results: int = 5) -> List[Dict]:
23
+ """
24
+ Search YouTube videos using official API
25
+
26
+ Args:
27
+ query: Search query
28
+ max_results: Maximum number of results
29
+
30
+ Returns:
31
+ List of video dictionaries
32
+ """
33
+ try:
34
+ # Search for videos
35
+ search_response = self.youtube.search().list(
36
+ q=query,
37
+ part='id,snippet',
38
+ maxResults=max_results,
39
+ type='video',
40
+ order='relevance',
41
+ videoDefinition='any'
42
+ ).execute()
43
+
44
+ videos = []
45
+ video_ids = []
46
+
47
+ # Extract video IDs and basic info
48
+ for item in search_response.get('items', []):
49
+ video_id = item['id']['videoId']
50
+ video_ids.append(video_id)
51
+
52
+ snippet = item['snippet']
53
+ videos.append({
54
+ 'video_id': video_id,
55
+ 'title': snippet['title'],
56
+ 'channel': snippet['channelTitle'],
57
+ 'description': snippet['description'],
58
+ 'thumbnail': snippet['thumbnails']['high']['url'],
59
+ 'published_at': snippet['publishedAt']
60
+ })
61
+
62
+ # Get additional statistics (views, duration)
63
+ if video_ids:
64
+ video_response = self.youtube.videos().list(
65
+ part='statistics,contentDetails',
66
+ id=','.join(video_ids)
67
+ ).execute()
68
+
69
+ for i, video_item in enumerate(video_response.get('items', [])):
70
+ stats = video_item['statistics']
71
+ duration = video_item['contentDetails']['duration']
72
+
73
+ videos[i]['views'] = int(stats.get('viewCount', 0))
74
+ videos[i]['views_text'] = f"{int(stats.get('viewCount', 0)):,} views"
75
+ videos[i]['likes'] = int(stats.get('likeCount', 0))
76
+ videos[i]['duration'] = self._parse_duration(duration)
77
+ videos[i]['url'] = f"https://www.youtube.com/watch?v={videos[i]['video_id']}"
78
+
79
+ print(f"✅ Found {len(videos)} videos via YouTube API")
80
+ return videos
81
+
82
+ except Exception as e:
83
+ print(f"❌ YouTube API error: {e}")
84
+ return []
85
+
86
+ def _parse_duration(self, duration: str) -> str:
87
+ """Convert ISO 8601 duration to readable format (PT1H2M10S -> 1:02:10)"""
88
+ import re
89
+
90
+ match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', duration)
91
+ if not match:
92
+ return "N/A"
93
+
94
+ hours, minutes, seconds = match.groups()
95
+ hours = int(hours) if hours else 0
96
+ minutes = int(minutes) if minutes else 0
97
+ seconds = int(seconds) if seconds else 0
98
+
99
+ if hours > 0:
100
+ return f"{hours}:{minutes:02d}:{seconds:02d}"
101
+ else:
102
+ return f"{minutes}:{seconds:02d}"
103
+
104
+ def format_video_list(self, videos: List[Dict]) -> str:
105
+ """Format video list for display"""
106
+ if not videos:
107
+ return "No videos found."
108
+
109
+ result = ""
110
+ for i, video in enumerate(videos, 1):
111
+ views_k = video.get('views', 0) / 1000
112
+ result += f"{i}. **{video['title']}**\n"
113
+ result += f" • Channel: {video['channel']}\n"
114
+ result += f" • Duration: {video.get('duration', 'N/A')} | Views: {views_k:.1f}K\n"
115
+ result += f" • Link: {video['url']}\n\n"
116
+
117
+ return result
118
+
119
+
120
+ # For backwards compatibility
121
+ class YouTubeScraper(YouTubeAPIClient):
122
+ """Alias for backwards compatibility"""
123
+ pass