noranisa commited on
Commit
46cf222
Β·
verified Β·
1 Parent(s): 1850309

Create services/google_news.py

Browse files
Files changed (1) hide show
  1. services/google_news.py +106 -0
services/google_news.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ services/google_news.py
3
+ Ambil berita Google News berdasarkan keyword menggunakan SerpApi.
4
+
5
+ Data yang diambil:
6
+ - title β†’ judul berita
7
+ - snippet β†’ ringkasan/deskripsi berita
8
+ - source β†’ nama media
9
+ - date β†’ tanggal terbit
10
+ """
11
+
12
+ import os
13
+
14
+ # ── CONFIG ──
15
+ SERPAPI_KEY = os.getenv("SERPAPI_KEY") # set di HF Spaces Secrets
16
+ MAX_RESULTS = 30 # jumlah berita per query
17
+
18
+
19
+ # ────────────────────────────────────────────
20
+ # INTERNAL HELPERS
21
+ # ────────────────────────────────────────────
22
+
23
+ def _extract_texts(news_results: list) -> list[str]:
24
+ """
25
+ Ekstrak teks dari hasil Google News SerpApi.
26
+
27
+ Struktur item:
28
+ item["title"] β†’ judul berita
29
+ item["snippet"] β†’ ringkasan berita
30
+ item["source"]["name"] β†’ nama media (opsional)
31
+ item["stories"] β†’ sub-artikel terkait (opsional)
32
+ .title / .snippet
33
+ """
34
+ texts = []
35
+
36
+ for item in news_results:
37
+ # Judul
38
+ title = (item.get("title") or "").strip()
39
+ if title and len(title) > 5:
40
+ texts.append(title)
41
+
42
+ # Snippet / ringkasan
43
+ snippet = (item.get("snippet") or item.get("description") or "").strip()
44
+ if snippet and len(snippet) > 10:
45
+ texts.append(snippet)
46
+
47
+ # Sub-stories (Google News kadang mengelompokkan berita terkait)
48
+ for story in item.get("stories", []):
49
+ if isinstance(story, dict):
50
+ st = (story.get("title") or "").strip()
51
+ ss = (story.get("snippet") or "").strip()
52
+ if st and len(st) > 5:
53
+ texts.append(st)
54
+ if ss and len(ss) > 10:
55
+ texts.append(ss)
56
+
57
+ return texts
58
+
59
+
60
+ # ────────────────────────────────────────────
61
+ # PUBLIC INTERFACE
62
+ # ────────────────────────────────────────────
63
+
64
+ def get_google_news(keyword: str) -> list[str]:
65
+ """
66
+ Scrape Google News berdasarkan keyword via SerpApi.
67
+ Return list of string (judul + snippet berita).
68
+
69
+ Dipanggil dari services/aggregator.py.
70
+ """
71
+ if not SERPAPI_KEY:
72
+ print("⚠️ SERPAPI_KEY tidak diset β€” skip Google News")
73
+ return []
74
+
75
+ try:
76
+ from serpapi import GoogleSearch
77
+ except ImportError:
78
+ print("❌ serpapi package tidak terinstall β€” tambahkan ke requirements.txt")
79
+ return []
80
+
81
+ try:
82
+ params = {
83
+ "engine": "google_news",
84
+ "q": keyword,
85
+ "hl": "id", # bahasa Indonesia
86
+ "gl": "id", # region Indonesia
87
+ "num": MAX_RESULTS,
88
+ "api_key": SERPAPI_KEY,
89
+ }
90
+
91
+ search = GoogleSearch(params)
92
+ results = search.get_dict()
93
+
94
+ news_results = results.get("news_results", [])
95
+
96
+ if not news_results:
97
+ print(f"⚠️ Google News: tidak ada hasil untuk '{keyword}'")
98
+ return []
99
+
100
+ texts = _extract_texts(news_results)
101
+ print(f"βœ… Google News: {len(texts)} teks dari {len(news_results)} berita")
102
+ return texts
103
+
104
+ except Exception as e:
105
+ print(f"❌ Google News error: {e}")
106
+ return []