Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

App Files Files Community

noranisa commited on 17 days ago

Commit

06f79f7

verified ·

1 Parent(s): f7370ef

Update main.py

Browse files

Files changed (1) hide show

main.py +487 -172

main.py CHANGED Viewed

@@ -1,32 +1,28 @@
 from flask import Flask, render_template, request, jsonify, send_file
 from services.aggregator import collect_data
-from services.sentiment import predict
-# =========================
-# IMPORT TAMBAHAN
-# =========================
 from collections import Counter
 import pandas as pd
 import os
 import re
 import numpy as np
-# VISUAL
-from wordcloud import WordCloud
 import matplotlib
-matplotlib.use('Agg')  # ← WAJIB: non-interactive backend untuk server
 import matplotlib.pyplot as plt
-# ML
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.linear_model import LinearRegression
-# GRAPH
 import networkx as nx
 from itertools import combinations
 # OPTIONAL ADVANCED
 try:
@@ -46,30 +42,83 @@ except Exception:
 app = Flask(__name__)
-# =========================
-# UTIL
-# =========================
-def clean_text(t):
-    t = t.lower()
-    t = re.sub(r'http\S+', '', t)
-    t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)
-    t = re.sub(r'\s+', ' ', t).strip()
-    return t
-# =========================
-# TOP WORDS
-# =========================
 STOPWORDS_ID = {
     'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada',
     'pada','juga','tidak','bisa','sudah','saya','kamu','kami','mereka','kita',
     'nya','pun','aja','gak','ga','ya','yg','dgn','yah','dah','udah','mau',
-    'jadi','buat','kalau','tp','tapi','tapi','banget','sangat','lebih','nih',
-    'sih','dong','lah','lagi','terus','sama','atau','karena','tapi','juga',
-    'so','the','is','in','of','to','a','an','and','it','for','that','this',
 }
-def get_top_words(texts):
     words = []
     for t in texts:
         for w in clean_text(t).split():
@@ -78,31 +127,386 @@ def get_top_words(texts):
     return [{"word": w, "count": c} for w, c in Counter(words).most_common(15)]
-# =========================
-# WORDCLOUD
-# =========================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
         texts = [t for t in texts if len(t.strip()) > 3]
         if not texts:
             return
-        combined = " ".join(texts)
         wc = WordCloud(
-            width=900, height=400,
-            background_color='white',
             max_words=80,
             stopwords=STOPWORDS_ID,
-            colormap='Blues'
-        ).generate(combined)
         wc.to_file("static/wordcloud.png")
     except Exception as e:
         print("wordcloud error:", e)
-# =========================
-# HEATMAP
-# =========================
 def generate_heatmap(data):
     try:
         if not data:
@@ -110,93 +514,49 @@ def generate_heatmap(data):
         labels  = ["Positive", "Neutral", "Negative"]
         sources = sorted(set(d["source"] for d in data))
         matrix  = np.zeros((len(sources), len(labels)))
         for d in data:
             i = sources.index(d["source"])
             j = labels.index(d["sentiment"])
             matrix[i][j] += 1
         if matrix.sum() == 0:
             return
         fig, ax = plt.subplots(figsize=(6, max(2, len(sources))))
         im = ax.imshow(matrix, cmap='Blues', aspect='auto')
         ax.set_xticks(range(len(labels)))
-        ax.set_xticklabels(labels)
         ax.set_yticks(range(len(sources)))
-        ax.set_yticklabels(sources)
         plt.colorbar(im, ax=ax)
         plt.tight_layout()
         os.makedirs("static", exist_ok=True)
-        plt.savefig("static/heatmap.png", dpi=100)
         plt.close(fig)
     except Exception as e:
         print("heatmap error:", e)
-# =========================
-# TIMELINE
-# =========================
-def generate_timeline(data):
-    try:
-        if not data:
-            return
-        os.makedirs("static", exist_ok=True)
-        pos = [1 if d["sentiment"] == "Positive" else 0 for d in data]
-        neg = [1 if d["sentiment"] == "Negative" else 0 for d in data]
-        neu = [1 if d["sentiment"] == "Neutral"  else 0 for d in data]
-        # rolling average
-        def roll(arr, n=5):
-            return [sum(arr[max(0,i-n):i+1]) / len(arr[max(0,i-n):i+1]) for i in range(len(arr))]
-        fig, ax = plt.subplots(figsize=(10, 3))
-        ax.plot(roll(pos), label="Positive", color="#22c55e", linewidth=1.5)
-        ax.plot(roll(neg), label="Negative", color="#ef4444", linewidth=1.5)
-        ax.plot(roll(neu), label="Neutral",  color="#94a3b8", linewidth=1.0)
-        ax.legend()
-        ax.set_facecolor('#f8fafc')
-        fig.patch.set_facecolor('#f8fafc')
-        plt.tight_layout()
-        plt.savefig("static/timeline.png", dpi=100)
-        plt.close(fig)
-    except Exception as e:
-        print("timeline error:", e)
-# =========================
-# TOPIC MODELING
-# =========================
 def get_topics(texts):
     try:
         texts = [t for t in texts if len(t) > 3]
         if len(texts) < 5:
             return [["data kurang"]]
         vec = CountVectorizer(min_df=2, stop_words=list(STOPWORDS_ID))
         X   = vec.fit_transform(texts)
         if X.shape[1] == 0:
             return [["kosong"]]
-        n_topics = min(3, X.shape[1])
-        lda      = LatentDirichletAllocation(n_components=n_topics, random_state=42)
         lda.fit(X)
         words  = vec.get_feature_names_out()
-        topics = []
-        for t in lda.components_:
-            topics.append([words[i] for i in t.argsort()[-5:]])
-        return topics
     except Exception as e:
         print("topic error:", e)
         return [["error"]]
-# =========================
-# INSIGHT
-# =========================
 def generate_insight(data):
     s = [d["sentiment"] for d in data]
     return (f"Positive:{s.count('Positive')} "
@@ -204,9 +564,6 @@ def generate_insight(data):
             f"Neutral:{s.count('Neutral')}")
-# =========================
-# CLUSTER
-# =========================
 def cluster_opinions(texts):
     try:
         if len(texts) < 6:
@@ -215,38 +572,19 @@ def cluster_opinions(texts):
         n = min(3, len(texts))
         k = KMeans(n_clusters=n, n_init=10, random_state=42).fit(X)
         clusters = {}
-        for i, label in enumerate(k.labels_):
-            clusters.setdefault(int(label), []).append(texts[i])
-        return [{"cluster": lbl, "samples": samples[:3]} for lbl, samples in clusters.items()]
     except Exception as e:
         print("cluster error:", e)
         return []
-# =========================
-# HOAX (keyword-based)
-# =========================
-HOAX_KW = [
-    "hoax","bohong","fitnah","propaganda","palsu","fake","disinformasi",
-    "menyesatkan","kebohongan","manipulasi","adu domba","provokasi"
-]
-def detect_hoax(texts):
-    results = []
-    for t in texts[:15]:
-        lower = t.lower()
-        label = "Hoax" if any(k in lower for k in HOAX_KW) else "Normal"
-        results.append({"text": t, "label": label})
-    return results
-# =========================
-# NETWORK
-# =========================
 def build_network(texts):
     edges = {}
     for t in texts:
-        words = [w for w in set(clean_text(t).split()) if len(w) > 3 and w not in STOPWORDS_ID][:6]
         for a, b in combinations(words, 2):
             key = tuple(sorted([a, b]))
             edges[key] = edges.get(key, 0) + 1
@@ -254,30 +592,22 @@ def build_network(texts):
             for k, v in edges.items() if v > 1]
-# =========================
-# BOT NETWORK
-# =========================
 def detect_bot_network(texts):
     try:
         if len(texts) < 5:
             return {"nodes": [], "edges": [], "bots": []}
         X   = TfidfVectorizer(max_features=300).fit_transform(texts)
         sim = cosine_similarity(X)
-        G = nx.Graph()
         for i in range(len(texts)):
             G.add_node(i, text=texts[i])
         for i in range(len(texts)):
             for j in range(i + 1, len(texts)):
                 if sim[i][j] > 0.75:
                     G.add_edge(i, j)
         central = nx.degree_centrality(G)
         bots    = [{"node": i, "score": round(s, 2), "text": texts[i]}
                    for i, s in central.items() if s > 0.3]
         return {
             "nodes": [{"id": i} for i in G.nodes()],
             "edges": [{"source": u, "target": v} for u, v in G.edges()],
@@ -288,32 +618,9 @@ def detect_bot_network(texts):
         return {"nodes": [], "edges": [], "bots": []}
-# =========================
-# TREND
-# =========================
-def predict_trend(data):
-    try:
-        y = [1 if d["sentiment"] == "Positive" else
-             -1 if d["sentiment"] == "Negative" else 0
-             for d in data]
-        if len(y) < 5:
-            return "Kurang Data"
-        X     = np.arange(len(y)).reshape(-1, 1)
-        coef  = LinearRegression().fit(X, y).coef_[0]
-        if coef > 0.05:
-            return "Naik Positif"
-        elif coef < -0.05:
-            return "Naik Negatif"
-        else:
-            return "Stabil"
-    except Exception as e:
-        print("trend error:", e)
-        return "Error"
-# =========================
 # ROUTES
-# =========================
 @app.route("/")
 def home():
     return render_template("index.html")
@@ -337,39 +644,50 @@ def analyze():
         texts   = [t for _, t in raw][:100]
         sources = [s for s, _ in raw][:100]
-        sentiments = predict(texts)
-        result = [
-            {"text": t, "sentiment": s, "source": src}
-            for t, s, src in zip(texts, sentiments, sources)
         ]
-        # VISUAL — non-blocking
         generate_wordcloud(texts)
-        generate_heatmap(result)
-        generate_timeline(result)
         # ANALYSIS
         top_words   = get_top_words(texts)
         topics      = get_topics(texts)
-        insight     = generate_insight(result)
         clusters    = cluster_opinions(texts)
-        hoax        = detect_hoax(texts)
         network     = build_network(texts)
         bot_network = detect_bot_network(texts)
-        trend       = predict_trend(result)
-        # ADVANCED (optional)
         bot_bert  = detect_bot_bert(texts)
         fake_news = detect_fake_news(texts)
-        gnn       = run_gnn(bot_network["nodes"], bot_network["edges"])
-        # SAVE CSV
         os.makedirs("static", exist_ok=True)
-        pd.DataFrame(result).to_csv("static/result.csv", index=False)
         return jsonify({
-            "data":        result,
             "top_words":   top_words,
             "topics":      topics,
             "insight":     insight,
@@ -380,7 +698,7 @@ def analyze():
             "trend":       trend,
             "bot_bert":    bot_bert,
             "fake_news":   fake_news,
-            "gnn":         gnn
         })
     except Exception as e:
@@ -401,8 +719,5 @@ def static_files(filename):
     return send_file(f"static/{filename}")
-# =========================
-# RUN
-# =========================
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=False)

 from flask import Flask, render_template, request, jsonify, send_file
 from services.aggregator import collect_data
+from services.sentiment import predict, predict_with_score
 from collections import Counter
 import pandas as pd
 import os
 import re
 import numpy as np
+from datetime import datetime
 import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
 import networkx as nx
 from itertools import combinations
+from wordcloud import WordCloud
 # OPTIONAL ADVANCED
 try:
 app = Flask(__name__)
+# =============================================================
+# STOPWORDS & SLANG NORMALIZATION
+# =============================================================
 STOPWORDS_ID = {
     'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada',
     'pada','juga','tidak','bisa','sudah','saya','kamu','kami','mereka','kita',
     'nya','pun','aja','gak','ga','ya','yg','dgn','yah','dah','udah','mau',
+    'jadi','buat','kalau','tp','tapi','banget','sangat','lebih','nih','sih',
+    'dong','lah','lagi','terus','sama','atau','karena','so','the','is','in',
+    'of','to','a','an','and','it','for','that','this','was','are','be',
+    'has','have','had','do','does','did','will','would','could','should',
 }
+SLANG_MAP = {
+    'gak':'tidak','ga':'tidak','nggak':'tidak','ngga':'tidak','enggak':'tidak',
+    'yg':'yang','dgn':'dengan','utk':'untuk','krn':'karena','karna':'karena',
+    'udah':'sudah','udh':'sudah','dah':'sudah','sdh':'sudah',
+    'gue':'saya','gw':'saya','aku':'saya','w':'saya',
+    'lo':'kamu','lu':'kamu','elo':'kamu',
+    'tp':'tapi','tpi':'tapi',
+    'jg':'juga','jga':'juga',
+    'bs':'bisa','bsa':'bisa',
+    'lg':'lagi','lgi':'lagi',
+    'sm':'sama','bgt':'banget','bngt':'banget',
+    'emg':'memang','emang':'memang','mmg':'memang',
+    'kyk':'kayak','kek':'kayak',
+    'dr':'dari','ke':'ke','pd':'pada',
+    'spy':'supaya','biar':'supaya',
+    'msh':'masih','masi':'masih',
+    'blm':'belum','blum':'belum',
+    'jd':'jadi','jdi':'jadi',
+    'sy':'saya','mrk':'mereka',
+    'mk':'maka','sdgkan':'sedangkan',
+    'hrs':'harus','wajib':'harus',
+    'krg':'kurang','krang':'kurang',
+    'skrg':'sekarang','skg':'sekarang',
+    'tdk':'tidak','tdk':'tidak','bkn':'bukan',
+    'pdhl':'padahal','pdhal':'padahal',
+    'bnr':'benar','bner':'benar',
+    'slh':'salah','slah':'salah',
+    'org':'orang','orng':'orang',
+    'trs':'terus','trus':'terus',
+    'knp':'kenapa','ngp':'kenapa',
+    'gmn':'gimana','gmana':'bagaimana','bgmn':'bagaimana',
+    'aja':'saja','aj':'saja',
+    'ok':'oke','oke':'oke','okay':'oke',
+    'wkwk':'haha','wkwkwk':'haha','hehe':'haha','hihi':'haha',
+    'brp':'berapa','brapa':'berapa',
+    'stlh':'setelah','sblm':'sebelum',
+    'ttg':'tentang','mnrt':'menurut',
+    'hrs':'harus','perlu':'harus',
+}
+# =============================================================
+# TEXT CLEANING WITH SLANG NORMALIZATION
+# =============================================================
+def clean_text(t: str) -> str:
+    t = t.lower().strip()
+    t = re.sub(r'http\S+|www\.\S+', '', t)           # hapus URL
+    t = re.sub(r'@\w+', '', t)                         # hapus mention
+    t = re.sub(r'#(\w+)', r'\1', t)                    # hashtag → kata
+    t = re.sub(r'(.)\1{2,}', r'\1\1', t)              # reduplikasi: "baguuus" → "bagus"
+    t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)             # hapus karakter khusus
+    # normalisasi slang
+    tokens = t.split()
+    tokens = [SLANG_MAP.get(w, w) for w in tokens]
+    t = ' '.join(tokens)
+    t = re.sub(r'\s+', ' ', t).strip()
+    return t
+# =============================================================
+# TOP WORDS
+# =============================================================
+def get_top_words(texts: list) -> list:
     words = []
     for t in texts:
         for w in clean_text(t).split():
     return [{"word": w, "count": c} for w, c in Counter(words).most_common(15)]
+# =============================================================
+# 🔴 FIX 1: DETEKSI HOAKS — ML-based (TF-IDF + Logistic Regression)
+# =============================================================
+# Training data minimal untuk bootstrap model
+# Label: 1 = berpotensi hoaks/disinformasi, 0 = normal
+_HOAX_TRAIN_TEXTS = [
+    # HOAKS (label=1)
+    "berita ini bohong dan tidak benar sama sekali",
+    "ini adalah propaganda yang menyesatkan masyarakat",
+    "jangan percaya hoax yang beredar di media sosial",
+    "informasi palsu yang disebarkan untuk memfitnah",
+    "ini adalah disinformasi yang sengaja dibuat untuk menipu",
+    "berita palsu yang beredar sangat meresahkan warga",
+    "mereka menyebarkan kebohongan dan fitnah kepada publik",
+    "isu ini adalah manipulasi politik yang berbahaya",
+    "provokasi yang dilakukan untuk memecah belah bangsa",
+    "konten ini mengandung ujaran kebencian dan fitnah",
+    "waspada berita bohong yang sengaja disebarkan",
+    "ini hoaks yang sudah dibantah oleh pihak berwenang",
+    "informasi yang menyesatkan dan tidak ada buktinya",
+    "narasi sesat yang dibuat untuk mengadu domba",
+    "berita manipulatif yang perlu diklarifikasi segera",
+    # NORMAL (label=0)
+    "produk ini sangat bagus dan berkualitas tinggi",
+    "saya sangat senang dengan pelayanannya yang ramah",
+    "hasil kerja tim ini luar biasa dan membanggakan",
+    "kebijakan ini berdampak positif bagi masyarakat luas",
+    "acara kemarin berjalan lancar dan sangat meriah",
+    "terima kasih atas bantuan yang diberikan selama ini",
+    "pemerintah berupaya meningkatkan kesejahteraan rakyat",
+    "inovasi terbaru ini sangat membantu kehidupan sehari-hari",
+    "prestasi luar biasa yang patut kita banggakan bersama",
+    "kondisi ekonomi mulai membaik berdasarkan data terbaru",
+    "program ini memberikan manfaat nyata bagi warga",
+    "kolaborasi yang baik menghasilkan output yang optimal",
+    "penelitian ini memberikan temuan yang sangat menarik",
+    "masyarakat antusias menyambut kebijakan baru tersebut",
+    "kualitas pendidikan terus meningkat dari tahun ke tahun",
+]
+_HOAX_TRAIN_LABELS = [1]*15 + [0]*15
+# Build pipeline sekali saat startup
+_hoax_pipeline = Pipeline([
+    ('tfidf', TfidfVectorizer(
+        ngram_range=(1, 2),
+        max_features=500,
+        sublinear_tf=True,
+    )),
+    ('clf', LogisticRegression(
+        C=1.0,
+        max_iter=200,
+        random_state=42,
+        class_weight='balanced',
+    )),
+])
+try:
+    _hoax_pipeline.fit(_HOAX_TRAIN_TEXTS, _HOAX_TRAIN_LABELS)
+    print("✅ Hoax classifier trained")
+except Exception as e:
+    print(f"⚠️  Hoax classifier training failed: {e}")
+    _hoax_pipeline = None
+def detect_hoax(texts: list) -> list:
+    """
+    Deteksi hoaks/disinformasi menggunakan TF-IDF + Logistic Regression.
+    Output: label (Hoax/Normal) + confidence score.
+    Fallback ke keyword-based jika model gagal.
+    """
+    results = []
+    sample  = texts[:20]
+    if _hoax_pipeline is not None:
+        try:
+            preds  = _hoax_pipeline.predict(sample)
+            probas = _hoax_pipeline.predict_proba(sample)
+            for t, pred, proba in zip(sample, preds, probas):
+                label      = "Hoax" if pred == 1 else "Normal"
+                confidence = round(float(max(proba)), 3)
+                results.append({
+                    "text":       t,
+                    "label":      label,
+                    "confidence": confidence,
+                    "method":     "ml"
+                })
+            return results
+        except Exception as e:
+            print(f"⚠️  Hoax ML predict error: {e} — fallback ke keyword")
+    # Fallback keyword-based (lebih kaya dari sebelumnya)
+    HOAX_KW = [
+        "hoax","bohong","fitnah","propaganda","palsu","fake","disinformasi",
+        "menyesatkan","kebohongan","manipulasi","adu domba","provokasi",
+        "berita palsu","ujaran kebencian","tidak benar","perlu diklarifikasi",
+        "waspada","jangan percaya","disebarkan untuk","narasi sesat",
+    ]
+    for t in sample:
+        lower  = t.lower()
+        score  = sum(1 for k in HOAX_KW if k in lower)
+        label  = "Hoax" if score >= 1 else "Normal"
+        conf   = min(0.5 + score * 0.1, 0.95) if label == "Hoax" else 0.6
+        results.append({
+            "text":       t,
+            "label":      label,
+            "confidence": round(conf, 3),
+            "method":     "keyword"
+        })
+    return results
+# =============================================================
+# 🔴 FIX 2: TREND — distribusi per-sumber, bukan regresi naif
+# =============================================================
+def predict_trend(data: list) -> dict:
+    """
+    Analisis tren sentimen yang lebih bermakna:
+    1. Distribusi sentimen per sumber platform
+    2. Dominasi sentimen keseluruhan
+    3. Indeks polarisasi (seberapa terpolarisasi opini)
+    4. Label tren (naik positif/negatif/stabil) dengan confidence
+    """
+    if not data:
+        return {
+            "label":        "Kurang Data",
+            "dominant":     "Neutral",
+            "polarity":     0.0,
+            "confidence":   0.0,
+            "by_source":    {},
+            "summary":      "Tidak ada data yang cukup untuk analisis tren."
+        }
+    sentiments = [d["sentiment"] for d in data]
+    total      = len(sentiments)
+    pos = sentiments.count("Positive")
+    neg = sentiments.count("Negative")
+    neu = sentiments.count("Neutral")
+    pos_r = pos / total
+    neg_r = neg / total
+    neu_r = neu / total
+    # Indeks polarisasi: seberapa jauh dari distribusi seimbang
+    # 0 = sangat seimbang, 1 = sangat terpolarisasi
+    polarity = round(abs(pos_r - neg_r), 3)
+    # Distribusi per sumber
+    by_source = {}
+    for d in data:
+        src = d.get("source", "unknown")
+        if src not in by_source:
+            by_source[src] = {"Positive": 0, "Negative": 0, "Neutral": 0, "total": 0}
+        by_source[src][d["sentiment"]] += 1
+        by_source[src]["total"] += 1
+    # Hitung persentase per sumber
+    for src in by_source:
+        t = by_source[src]["total"]
+        by_source[src]["pos_pct"] = round(by_source[src]["Positive"] / t * 100, 1)
+        by_source[src]["neg_pct"] = round(by_source[src]["Negative"] / t * 100, 1)
+        by_source[src]["neu_pct"] = round(by_source[src]["Neutral"]  / t * 100, 1)
+    # Label tren & confidence
+    if pos_r > neg_r and pos_r > neu_r:
+        label      = "Dominan Positif"
+        dominant   = "Positive"
+        confidence = round(pos_r, 3)
+    elif neg_r > pos_r and neg_r > neu_r:
+        label      = "Dominan Negatif"
+        dominant   = "Negative"
+        confidence = round(neg_r, 3)
+    elif neu_r >= 0.5:
+        label      = "Mayoritas Netral"
+        dominant   = "Neutral"
+        confidence = round(neu_r, 3)
+    else:
+        label      = "Terpolarisasi"
+        dominant   = "Mixed"
+        confidence = round(polarity, 3)
+    # Narasi ringkas
+    dominant_src = max(by_source, key=lambda s: by_source[s]["total"]) if by_source else "-"
+    summary = (
+        f"{label} ({round(pos_r*100,1)}% positif, "
+        f"{round(neg_r*100,1)}% negatif, "
+        f"{round(neu_r*100,1)}% netral). "
+        f"Indeks polarisasi: {polarity:.2f}. "
+        f"Sumber terbanyak: {dominant_src}."
+    )
+    return {
+        "label":      label,
+        "dominant":   dominant,
+        "polarity":   polarity,
+        "confidence": confidence,
+        "by_source":  by_source,
+        "pos_pct":    round(pos_r * 100, 1),
+        "neg_pct":    round(neg_r * 100, 1),
+        "neu_pct":    round(neu_r * 100, 1),
+        "summary":    summary,
+    }
+# =============================================================
+# 🔴 FIX 3: TIMELINE — distribusi kumulatif yang akurat
+# =============================================================
+def generate_timeline(data: list):
+    """
+    Visualisasi distribusi sentimen yang jujur:
+    - X-axis: indeks urutan (dengan label yang jelas)
+    - Y-axis: proporsi kumulatif sentimen (bukan binary 0/1)
+    - Tambahkan annotation rata-rata di tiap segmen
+    """
+    try:
+        if not data or len(data) < 3:
+            return
+        os.makedirs("static", exist_ok=True)
+        window = max(5, len(data) // 10)   # window adaptif
+        def rolling_mean(arr, w):
+            result = []
+            for i in range(len(arr)):
+                sl = arr[max(0, i - w + 1): i + 1]
+                result.append(sum(sl) / len(sl))
+            return result
+        pos_raw = [1 if d["sentiment"] == "Positive" else 0 for d in data]
+        neg_raw = [1 if d["sentiment"] == "Negative" else 0 for d in data]
+        neu_raw = [1 if d["sentiment"] == "Neutral"  else 0 for d in data]
+        x   = list(range(1, len(data) + 1))
+        pos = rolling_mean(pos_raw, window)
+        neg = rolling_mean(neg_raw, window)
+        neu = rolling_mean(neu_raw, window)
+        fig, ax = plt.subplots(figsize=(11, 3.5))
+        fig.patch.set_facecolor('#0e1117')
+        ax.set_facecolor('#141820')
+        ax.fill_between(x, pos, alpha=0.15, color='#22c55e')
+        ax.fill_between(x, neg, alpha=0.15, color='#ef4444')
+        ax.plot(x, pos, label='Positif',  color='#22c55e', linewidth=1.8, alpha=0.9)
+        ax.plot(x, neg, label='Negatif',  color='#ef4444', linewidth=1.8, alpha=0.9)
+        ax.plot(x, neu, label='Netral',   color='#94a3b8', linewidth=1.2, alpha=0.7, linestyle='--')
+        ax.set_xlabel(
+            f'Urutan komentar (rolling mean, window={window})',
+            color='#5a6070', fontsize=8
+        )
+        ax.set_ylabel('Proporsi', color='#5a6070', fontsize=8)
+        ax.tick_params(colors='#5a6070', labelsize=7)
+        for spine in ax.spines.values():
+            spine.set_edgecolor('#1a2030')
+        ax.legend(
+            fontsize=8, loc='upper right',
+            facecolor='#141820', edgecolor='#1a2030',
+            labelcolor='#8892a4'
+        )
+        ax.set_ylim(0, 1.05)
+        ax.set_xlim(1, len(data))
+        # annotation rata-rata
+        ax.axhline(np.mean(pos_raw), color='#22c55e', linewidth=0.6, linestyle=':', alpha=0.5)
+        ax.axhline(np.mean(neg_raw), color='#ef4444', linewidth=0.6, linestyle=':', alpha=0.5)
+        plt.tight_layout(pad=1.0)
+        plt.savefig("static/timeline.png", dpi=110, facecolor=fig.get_facecolor())
+        plt.close(fig)
+    except Exception as e:
+        print("timeline error:", e)
+# =============================================================
+# 🔴 FIX 4: GNN — deterministic, fitur TF-IDF bukan random
+# =============================================================
+def run_gnn_safe(nodes: list, edges: list, texts: list) -> list:
+    """
+    GNN dengan fitur deterministik dari TF-IDF.
+    Tidak lagi menggunakan torch.rand() — hasil konsisten & bermakna.
+    Output: anomaly score per node berdasarkan graph convolution.
+    """
+    if not nodes or not edges or len(nodes) < 3:
+        return [{"node": n["id"], "score": 0.0} for n in nodes]
+    try:
+        import torch
+        from torch_geometric.data import Data
+        from torch_geometric.nn import GCNConv
+        # Fitur node dari TF-IDF (bukan random)
+        node_texts = [texts[n["id"]] if n["id"] < len(texts) else "" for n in nodes]
+        vec = TfidfVectorizer(max_features=32, min_df=1)
+        try:
+            X = vec.fit_transform(node_texts).toarray()
+        except Exception:
+            # fallback jika TF-IDF gagal (misal semua teks kosong)
+            X = np.eye(len(nodes), 32)
+        x = torch.tensor(X, dtype=torch.float)
+        # Edge index
+        edge_list = [[e["source"], e["target"]] for e in edges
+                     if e["source"] < len(nodes) and e["target"] < len(nodes)]
+        if not edge_list:
+            return [{"node": n["id"], "score": 0.0} for n in nodes]
+        edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
+        # Model GCN sederhana (tidak ditraining — hanya forward pass untuk anomaly scoring)
+        class GCN(torch.nn.Module):
+            def __init__(self, in_ch):
+                super().__init__()
+                self.conv1 = GCNConv(in_ch, 16)
+                self.conv2 = GCNConv(16, 4)
+            def forward(self, x, ei):
+                x = torch.relu(self.conv1(x, ei))
+                return self.conv2(x, ei)
+        # Seed untuk reproducibility
+        torch.manual_seed(42)
+        model = GCN(x.shape[1])
+        model.eval()
+        with torch.no_grad():
+            out = model(x, edge_index)
+        # Anomaly score = L2 norm dari output embedding
+        scores = torch.norm(out, dim=1).numpy()
+        # Normalize ke [0, 1]
+        if scores.max() > scores.min():
+            scores = (scores - scores.min()) / (scores.max() - scores.min())
+        else:
+            scores = np.zeros(len(scores))
+        return [
+            {"node": nodes[i]["id"], "score": round(float(scores[i]), 3)}
+            for i in range(len(nodes))
+        ]
+    except ImportError:
+        print("⚠️  torch-geometric tidak tersedia — skip GNN")
+        return [{"node": n["id"], "score": 0.0} for n in nodes]
+    except Exception as e:
+        print(f"⚠️  GNN error: {e}")
+        return [{"node": n["id"], "score": 0.0} for n in nodes]
+# =============================================================
+# FUNGSI LAIN (tidak berubah, tapi disempurnakan)
+# =============================================================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
         texts = [t for t in texts if len(t.strip()) > 3]
         if not texts:
             return
         wc = WordCloud(
+            width=900, height=380,
+            background_color='#0e1117',
+            color_func=lambda *a, **k: '#4f9cf9',
             max_words=80,
             stopwords=STOPWORDS_ID,
+        ).generate(" ".join(texts))
         wc.to_file("static/wordcloud.png")
     except Exception as e:
         print("wordcloud error:", e)
 def generate_heatmap(data):
     try:
         if not data:
         labels  = ["Positive", "Neutral", "Negative"]
         sources = sorted(set(d["source"] for d in data))
         matrix  = np.zeros((len(sources), len(labels)))
         for d in data:
             i = sources.index(d["source"])
             j = labels.index(d["sentiment"])
             matrix[i][j] += 1
         if matrix.sum() == 0:
             return
         fig, ax = plt.subplots(figsize=(6, max(2, len(sources))))
+        fig.patch.set_facecolor('#0e1117')
+        ax.set_facecolor('#141820')
         im = ax.imshow(matrix, cmap='Blues', aspect='auto')
         ax.set_xticks(range(len(labels)))
+        ax.set_xticklabels(labels, color='#8892a4', fontsize=9)
         ax.set_yticks(range(len(sources)))
+        ax.set_yticklabels(sources, color='#8892a4', fontsize=9)
+        ax.tick_params(colors='#5a6070')
         plt.colorbar(im, ax=ax)
         plt.tight_layout()
         os.makedirs("static", exist_ok=True)
+        plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor())
         plt.close(fig)
     except Exception as e:
         print("heatmap error:", e)
 def get_topics(texts):
     try:
         texts = [t for t in texts if len(t) > 3]
         if len(texts) < 5:
             return [["data kurang"]]
         vec = CountVectorizer(min_df=2, stop_words=list(STOPWORDS_ID))
         X   = vec.fit_transform(texts)
         if X.shape[1] == 0:
             return [["kosong"]]
+        n   = min(3, X.shape[1])
+        lda = LatentDirichletAllocation(n_components=n, random_state=42)
         lda.fit(X)
         words  = vec.get_feature_names_out()
+        return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_]
     except Exception as e:
         print("topic error:", e)
         return [["error"]]
 def generate_insight(data):
     s = [d["sentiment"] for d in data]
     return (f"Positive:{s.count('Positive')} "
             f"Neutral:{s.count('Neutral')}")
 def cluster_opinions(texts):
     try:
         if len(texts) < 6:
         n = min(3, len(texts))
         k = KMeans(n_clusters=n, n_init=10, random_state=42).fit(X)
         clusters = {}
+        for i, lbl in enumerate(k.labels_):
+            clusters.setdefault(int(lbl), []).append(texts[i])
+        return [{"cluster": lbl, "samples": s[:3]} for lbl, s in clusters.items()]
     except Exception as e:
         print("cluster error:", e)
         return []
 def build_network(texts):
     edges = {}
     for t in texts:
+        words = [w for w in set(clean_text(t).split())
+                 if len(w) > 3 and w not in STOPWORDS_ID][:6]
         for a, b in combinations(words, 2):
             key = tuple(sorted([a, b]))
             edges[key] = edges.get(key, 0) + 1
             for k, v in edges.items() if v > 1]
 def detect_bot_network(texts):
     try:
         if len(texts) < 5:
             return {"nodes": [], "edges": [], "bots": []}
         X   = TfidfVectorizer(max_features=300).fit_transform(texts)
         sim = cosine_similarity(X)
+        G   = nx.Graph()
         for i in range(len(texts)):
             G.add_node(i, text=texts[i])
         for i in range(len(texts)):
             for j in range(i + 1, len(texts)):
                 if sim[i][j] > 0.75:
                     G.add_edge(i, j)
         central = nx.degree_centrality(G)
         bots    = [{"node": i, "score": round(s, 2), "text": texts[i]}
                    for i, s in central.items() if s > 0.3]
         return {
             "nodes": [{"id": i} for i in G.nodes()],
             "edges": [{"source": u, "target": v} for u, v in G.edges()],
         return {"nodes": [], "edges": [], "bots": []}
+# =============================================================
 # ROUTES
+# =============================================================
 @app.route("/")
 def home():
     return render_template("index.html")
         texts   = [t for _, t in raw][:100]
         sources = [s for s, _ in raw][:100]
+        # Sentimen dengan confidence score
+        scored     = predict_with_score(texts)
+        sentiments = [s["label"] for s in scored]
+        scores     = [s["score"] for s in scored]
+        result_data = [
+            {
+                "text":       t,
+                "sentiment":  s,
+                "confidence": c,
+                "source":     src,
+                "scraped_at": datetime.now().strftime("%Y-%m-%d %H:%M")
+            }
+            for t, s, c, src in zip(texts, sentiments, scores, sources)
         ]
+        # VISUAL
         generate_wordcloud(texts)
+        generate_heatmap(result_data)
+        generate_timeline(result_data)
         # ANALYSIS
         top_words   = get_top_words(texts)
         topics      = get_topics(texts)
+        insight     = generate_insight(result_data)
         clusters    = cluster_opinions(texts)
+        trend       = predict_trend(result_data)     # dict sekarang
+        hoax        = detect_hoax(texts)             # ML-based
         network     = build_network(texts)
         bot_network = detect_bot_network(texts)
+        # GNN deterministik
+        gnn = run_gnn_safe(bot_network["nodes"], bot_network["edges"], texts)
+        # ADVANCED optional
         bot_bert  = detect_bot_bert(texts)
         fake_news = detect_fake_news(texts)
+        # SAVE CSV dengan kolom lebih lengkap
         os.makedirs("static", exist_ok=True)
+        pd.DataFrame(result_data).to_csv("static/result.csv", index=False)
         return jsonify({
+            "data":        result_data,
             "top_words":   top_words,
             "topics":      topics,
             "insight":     insight,
             "trend":       trend,
             "bot_bert":    bot_bert,
             "fake_news":   fake_news,
+            "gnn":         gnn,
         })
     except Exception as e:
     return send_file(f"static/{filename}")
 if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=False)