ak0601 commited on
Commit
bbd3032
·
verified ·
1 Parent(s): 50b7f7b

Update app/fetcher/youtube_client.py

Browse files
Files changed (1) hide show
  1. app/fetcher/youtube_client.py +62 -62
app/fetcher/youtube_client.py CHANGED
@@ -1,62 +1,62 @@
1
- """
2
- ResearchRadar — YouTube Video Fetcher (Robust).
3
- """
4
-
5
- from __future__ import annotations
6
-
7
- import logging
8
- import xml.etree.ElementTree as ET
9
- from typing import Dict, List
10
-
11
- import requests
12
- from app.fetcher.http_session import RetrySession
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- _AI_CHANNEL_IDS = [
17
- 'UCghJTNTO9kcDeUFXMuSDGLQ', # Two Minute Papers
18
- 'UCLKPca3kwwd-B59HNr-_lvA', # AI Explained
19
- 'UCZHmQk67mSJgfCCTn7xBfew', # Yannic Kilcher
20
- 'UCYUq87t77YNTG5m256fOXeQ',
21
- 'UCgBncpylJ1kiVaPyP-PZauQ',
22
- 'UC6MhHkSosYXAD-LTXBWyLMg',
23
- 'UCJgIbYl6C5no72a0NUAPcTA',
24
- 'UCNIkB2IeJ-6AmZv7bQ1oBYg',
25
- # Sentdex
26
- ]
27
-
28
- def fetch_latest_videos(limit_per_channel: int = 1) -> List[Dict[str, str]]:
29
- """Pulls the most recent videos from our list of AI YouTube channels."""
30
- session = RetrySession()
31
- videos = []
32
-
33
- for cid in _AI_CHANNEL_IDS:
34
- url = f"https://www.youtube.com/feeds/videos.xml?channel_id={cid}"
35
- try:
36
- resp = session.get(url)
37
- if resp.status_code == 200:
38
- root = ET.fromstring(resp.text)
39
- ns = {'a': 'http://www.w3.org/2005/Atom'}
40
-
41
- entries = root.findall('a:entry', ns)
42
- logger.info(f"YouTube: Found {len(entries)} entries for channel {cid}")
43
-
44
- for entry in entries[:limit_per_channel]:
45
- title_elem = entry.find('a:title', ns)
46
- title = title_elem.text if title_elem is not None else "Unknown Title"
47
-
48
- # Find the link with rel="alternate"
49
- link = ""
50
- for link_elem in entry.findall('a:link', ns):
51
- if link_elem.attrib.get('rel') == 'alternate':
52
- link = link_elem.attrib.get('href', '')
53
- break
54
-
55
- if title and link:
56
- videos.append({'title': title, 'url': link})
57
- else:
58
- logger.warning(f"YouTube RSS status {resp.status_code} for {cid}")
59
- except Exception as e:
60
- logger.error(f"Error fetching YouTube feed for channel {cid}: {e}")
61
-
62
- return videos
 
1
+ """
2
+ ResearchRadar — YouTube Video Fetcher (Robust).
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import xml.etree.ElementTree as ET
9
+ from typing import Dict, List
10
+
11
+ import requests
12
+ from app.fetcher.http_session import RetrySession
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ _AI_CHANNEL_IDS = [
17
+ 'UCghJTNTO9kcDeUFXMuSDGLQ', # Two Minute Papers
18
+ 'UCLKPca3kwwd-B59HNr-_lvA', # AI Explained
19
+ 'UCZHmQk67mSJgfCCTn7xBfew', # Yannic Kilcher
20
+ 'UCYUq87t77YNTG5m256fOXeQ',
21
+ 'UCgBncpylJ1kiVaPyP-PZauQ',
22
+ 'UC6MhHkSosYXAD-LTXBWyLMg',
23
+ 'UCJgIbYl6C5no72a0NUAPcTA',
24
+ 'UCNIkB2IeJ-6AmZv7bQ1oBYg',
25
+ # Sentdex
26
+ ]
27
+
28
+ def fetch_latest_videos(limit_per_channel: int = 1) -> List[Dict[str, str]]:
29
+ """Pulls the most recent videos from our list of AI YouTube channels."""
30
+ session = RetrySession()
31
+ videos = []
32
+
33
+ for cid in _AI_CHANNEL_IDS:
34
+ url = f"https://www.youtube.com/feeds/videos.xml?channel_id={cid}"
35
+ try:
36
+ resp = session.get(url)
37
+ if resp.status_code == 200:
38
+ root = ET.fromstring(resp.text)
39
+ ns = {'a': 'http://www.w3.org/2005/Atom'}
40
+
41
+ entries = root.findall('a:entry', ns)
42
+ logger.info(f"YouTube: Found {len(entries)} entries for channel {cid}")
43
+
44
+ for entry in entries[:limit_per_channel]:
45
+ title_elem = entry.find('a:title', ns)
46
+ title = title_elem.text if title_elem is not None else "Unknown Title"
47
+
48
+ # Find the link with rel="alternate"
49
+ link = ""
50
+ for link_elem in entry.findall('a:link', ns):
51
+ if link_elem.attrib.get('rel') == 'alternate':
52
+ link = link_elem.attrib.get('href', '')
53
+ break
54
+
55
+ if title and link:
56
+ videos.append({'title': title, 'url': link})
57
+ else:
58
+ logger.warning(f"YouTube RSS status {resp.status_code} for {cid}")
59
+ except Exception as e:
60
+ logger.error(f"Error fetching YouTube feed for channel {cid}: {e}")
61
+
62
+ return videos