Spaces:
Sleeping
Sleeping
Update services/aggregator.py
Browse files- services/aggregator.py +14 -18
services/aggregator.py
CHANGED
|
@@ -1,32 +1,28 @@
|
|
| 1 |
from services.youtube import search_videos, get_comments
|
| 2 |
from services.reddit import get_reddit_comments
|
| 3 |
-
from services.preprocessing import clean_text
|
| 4 |
|
| 5 |
def collect_data(keyword, source="all"):
|
| 6 |
all_comments = []
|
| 7 |
|
| 8 |
-
#
|
| 9 |
if source in ["youtube", "all"]:
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
all_comments.extend(get_comments(vid))
|
| 14 |
-
except Exception as e:
|
| 15 |
-
print("YouTube error:", e)
|
| 16 |
|
| 17 |
-
#
|
| 18 |
if source in ["reddit", "all"]:
|
| 19 |
-
|
| 20 |
-
reddit_comments = get_reddit_comments(keyword)
|
| 21 |
-
if reddit_comments:
|
| 22 |
-
all_comments.extend(reddit_comments)
|
| 23 |
-
except Exception as e:
|
| 24 |
-
print("Reddit error (skip):", e)
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
if
|
| 28 |
all_comments = ["data tidak ditemukan"]
|
| 29 |
|
| 30 |
-
cleaned = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
return cleaned
|
|
|
|
| 1 |
from services.youtube import search_videos, get_comments
|
| 2 |
from services.reddit import get_reddit_comments
|
| 3 |
+
from services.preprocessing import clean_text, is_valid
|
| 4 |
|
| 5 |
def collect_data(keyword, source="all"):
|
| 6 |
all_comments = []
|
| 7 |
|
| 8 |
+
# YouTube
|
| 9 |
if source in ["youtube", "all"]:
|
| 10 |
+
vids = search_videos(keyword)
|
| 11 |
+
for v in vids:
|
| 12 |
+
all_comments.extend(get_comments(v))
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
# Reddit
|
| 15 |
if source in ["reddit", "all"]:
|
| 16 |
+
all_comments.extend(get_reddit_comments(keyword))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
# fallback
|
| 19 |
+
if not all_comments:
|
| 20 |
all_comments = ["data tidak ditemukan"]
|
| 21 |
|
| 22 |
+
cleaned = [
|
| 23 |
+
clean_text(c)
|
| 24 |
+
for c in all_comments
|
| 25 |
+
if is_valid(c)
|
| 26 |
+
]
|
| 27 |
|
| 28 |
return cleaned
|