noranisa commited on
Commit
fa302f6
·
verified ·
1 Parent(s): 17d11b3

Create services/aggregator.py

Browse files
Files changed (1) hide show
  1. services/aggregator.py +19 -0
services/aggregator.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from services.youtube import search_videos, get_comments
2
+ from services.reddit import get_reddit_comments
3
+ from services.preprocessing import clean_text
4
+
5
+ def collect_data(keyword):
6
+ all_comments = []
7
+
8
+ # YouTube
9
+ video_ids = search_videos(keyword)
10
+ for vid in video_ids:
11
+ all_comments.extend(get_comments(vid))
12
+
13
+ # Reddit
14
+ all_comments.extend(get_reddit_comments(keyword))
15
+
16
+ # Clean
17
+ cleaned = [clean_text(c) for c in all_comments if len(c) > 5]
18
+
19
+ return cleaned