Sentimen-Analysis / services /google_news.py
noranisa's picture
Create services/google_news.py
46cf222 verified
"""
services/google_news.py
Ambil berita Google News berdasarkan keyword menggunakan SerpApi.
Data yang diambil:
- title β†’ judul berita
- snippet β†’ ringkasan/deskripsi berita
- source β†’ nama media
- date β†’ tanggal terbit
"""
import os
# ── CONFIG ──
SERPAPI_KEY = os.getenv("SERPAPI_KEY") # set di HF Spaces Secrets
MAX_RESULTS = 30 # jumlah berita per query
# ────────────────────────────────────────────
# INTERNAL HELPERS
# ────────────────────────────────────────────
def _extract_texts(news_results: list) -> list[str]:
"""
Ekstrak teks dari hasil Google News SerpApi.
Struktur item:
item["title"] β†’ judul berita
item["snippet"] β†’ ringkasan berita
item["source"]["name"] β†’ nama media (opsional)
item["stories"] β†’ sub-artikel terkait (opsional)
.title / .snippet
"""
texts = []
for item in news_results:
# Judul
title = (item.get("title") or "").strip()
if title and len(title) > 5:
texts.append(title)
# Snippet / ringkasan
snippet = (item.get("snippet") or item.get("description") or "").strip()
if snippet and len(snippet) > 10:
texts.append(snippet)
# Sub-stories (Google News kadang mengelompokkan berita terkait)
for story in item.get("stories", []):
if isinstance(story, dict):
st = (story.get("title") or "").strip()
ss = (story.get("snippet") or "").strip()
if st and len(st) > 5:
texts.append(st)
if ss and len(ss) > 10:
texts.append(ss)
return texts
# ────────────────────────────────────────────
# PUBLIC INTERFACE
# ────────────────────────────────────────────
def get_google_news(keyword: str) -> list[str]:
"""
Scrape Google News berdasarkan keyword via SerpApi.
Return list of string (judul + snippet berita).
Dipanggil dari services/aggregator.py.
"""
if not SERPAPI_KEY:
print("⚠️ SERPAPI_KEY tidak diset β€” skip Google News")
return []
try:
from serpapi import GoogleSearch
except ImportError:
print("❌ serpapi package tidak terinstall β€” tambahkan ke requirements.txt")
return []
try:
params = {
"engine": "google_news",
"q": keyword,
"hl": "id", # bahasa Indonesia
"gl": "id", # region Indonesia
"num": MAX_RESULTS,
"api_key": SERPAPI_KEY,
}
search = GoogleSearch(params)
results = search.get_dict()
news_results = results.get("news_results", [])
if not news_results:
print(f"⚠️ Google News: tidak ada hasil untuk '{keyword}'")
return []
texts = _extract_texts(news_results)
print(f"βœ… Google News: {len(texts)} teks dari {len(news_results)} berita")
return texts
except Exception as e:
print(f"❌ Google News error: {e}")
return []