Spaces:
Running
Running
| """ | |
| services/google_news.py | |
| Ambil berita Google News berdasarkan keyword menggunakan SerpApi. | |
| Data yang diambil: | |
| - title β judul berita | |
| - snippet β ringkasan/deskripsi berita | |
| - source β nama media | |
| - date β tanggal terbit | |
| """ | |
| import os | |
| # ββ CONFIG ββ | |
| SERPAPI_KEY = os.getenv("SERPAPI_KEY") # set di HF Spaces Secrets | |
| MAX_RESULTS = 30 # jumlah berita per query | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # INTERNAL HELPERS | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| def _extract_texts(news_results: list) -> list[str]: | |
| """ | |
| Ekstrak teks dari hasil Google News SerpApi. | |
| Struktur item: | |
| item["title"] β judul berita | |
| item["snippet"] β ringkasan berita | |
| item["source"]["name"] β nama media (opsional) | |
| item["stories"] β sub-artikel terkait (opsional) | |
| .title / .snippet | |
| """ | |
| texts = [] | |
| for item in news_results: | |
| # Judul | |
| title = (item.get("title") or "").strip() | |
| if title and len(title) > 5: | |
| texts.append(title) | |
| # Snippet / ringkasan | |
| snippet = (item.get("snippet") or item.get("description") or "").strip() | |
| if snippet and len(snippet) > 10: | |
| texts.append(snippet) | |
| # Sub-stories (Google News kadang mengelompokkan berita terkait) | |
| for story in item.get("stories", []): | |
| if isinstance(story, dict): | |
| st = (story.get("title") or "").strip() | |
| ss = (story.get("snippet") or "").strip() | |
| if st and len(st) > 5: | |
| texts.append(st) | |
| if ss and len(ss) > 10: | |
| texts.append(ss) | |
| return texts | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| # PUBLIC INTERFACE | |
| # ββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_google_news(keyword: str) -> list[str]: | |
| """ | |
| Scrape Google News berdasarkan keyword via SerpApi. | |
| Return list of string (judul + snippet berita). | |
| Dipanggil dari services/aggregator.py. | |
| """ | |
| if not SERPAPI_KEY: | |
| print("β οΈ SERPAPI_KEY tidak diset β skip Google News") | |
| return [] | |
| try: | |
| from serpapi import GoogleSearch | |
| except ImportError: | |
| print("β serpapi package tidak terinstall β tambahkan ke requirements.txt") | |
| return [] | |
| try: | |
| params = { | |
| "engine": "google_news", | |
| "q": keyword, | |
| "hl": "id", # bahasa Indonesia | |
| "gl": "id", # region Indonesia | |
| "num": MAX_RESULTS, | |
| "api_key": SERPAPI_KEY, | |
| } | |
| search = GoogleSearch(params) | |
| results = search.get_dict() | |
| news_results = results.get("news_results", []) | |
| if not news_results: | |
| print(f"β οΈ Google News: tidak ada hasil untuk '{keyword}'") | |
| return [] | |
| texts = _extract_texts(news_results) | |
| print(f"β Google News: {len(texts)} teks dari {len(news_results)} berita") | |
| return texts | |
| except Exception as e: | |
| print(f"β Google News error: {e}") | |
| return [] |