Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

App Files Files Community

noranisa commited on 22 days ago

Commit

46cf222

verified ·

1 Parent(s): 1850309

Create services/google_news.py

Browse files

Files changed (1) hide show

services/google_news.py +106 -0

services/google_news.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""
+services/google_news.py
+Ambil berita Google News berdasarkan keyword menggunakan SerpApi.
+Data yang diambil:
+  - title    → judul berita
+  - snippet  → ringkasan/deskripsi berita
+  - source   → nama media
+  - date     → tanggal terbit
+"""
+import os
+# ── CONFIG ──
+SERPAPI_KEY = os.getenv("SERPAPI_KEY")   # set di HF Spaces Secrets
+MAX_RESULTS = 30                          # jumlah berita per query
+# ────────────────────────────────────────────
+#  INTERNAL HELPERS
+# ────────────────────────────────────────────
+def _extract_texts(news_results: list) -> list[str]:
+    """
+    Ekstrak teks dari hasil Google News SerpApi.
+    Struktur item:
+      item["title"]           → judul berita
+      item["snippet"]         → ringkasan berita
+      item["source"]["name"]  → nama media (opsional)
+      item["stories"]         → sub-artikel terkait (opsional)
+        .title / .snippet
+    """
+    texts = []
+    for item in news_results:
+        # Judul
+        title = (item.get("title") or "").strip()
+        if title and len(title) > 5:
+            texts.append(title)
+        # Snippet / ringkasan
+        snippet = (item.get("snippet") or item.get("description") or "").strip()
+        if snippet and len(snippet) > 10:
+            texts.append(snippet)
+        # Sub-stories (Google News kadang mengelompokkan berita terkait)
+        for story in item.get("stories", []):
+            if isinstance(story, dict):
+                st = (story.get("title") or "").strip()
+                ss = (story.get("snippet") or "").strip()
+                if st and len(st) > 5:
+                    texts.append(st)
+                if ss and len(ss) > 10:
+                    texts.append(ss)
+    return texts
+# ────────────────────────────────────────────
+#  PUBLIC INTERFACE
+# ────────────────────────────────────────────
+def get_google_news(keyword: str) -> list[str]:
+    """
+    Scrape Google News berdasarkan keyword via SerpApi.
+    Return list of string (judul + snippet berita).
+    Dipanggil dari services/aggregator.py.
+    """
+    if not SERPAPI_KEY:
+        print("⚠️  SERPAPI_KEY tidak diset — skip Google News")
+        return []
+    try:
+        from serpapi import GoogleSearch
+    except ImportError:
+        print("❌ serpapi package tidak terinstall — tambahkan ke requirements.txt")
+        return []
+    try:
+        params = {
+            "engine":   "google_news",
+            "q":        keyword,
+            "hl":       "id",          # bahasa Indonesia
+            "gl":       "id",          # region Indonesia
+            "num":      MAX_RESULTS,
+            "api_key":  SERPAPI_KEY,
+        }
+        search  = GoogleSearch(params)
+        results = search.get_dict()
+        news_results = results.get("news_results", [])
+        if not news_results:
+            print(f"⚠️  Google News: tidak ada hasil untuk '{keyword}'")
+            return []
+        texts = _extract_texts(news_results)
+        print(f"✅ Google News: {len(texts)} teks dari {len(news_results)} berita")
+        return texts
+    except Exception as e:
+        print(f"❌ Google News error: {e}")
+        return []