Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

File size: 26,280 Bytes

ababcd4
adcfce4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ababcd4
91d8afe
acfb9de
f59bfc4
5cb0ade
06f79f7
ababcd4
7051fa3
06f79f7
ababcd4
 
409899f
5cb0ade
409899f
5cb0ade
06f79f7
 
5cb0ade
 
06f79f7
5cb0ade
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
 
7051fa3
5cb0ade
 
 
 
7051fa3
5cb0ade
 
dd8b15b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
f59bfc4
91d8afe
f59bfc4
 
 
 
06f79f7
f59bfc4
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
f59bfc4
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
 
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
a3e4fd3
 
 
f59bfc4
 
 
 
a3e4fd3
ababcd4
a3e4fd3
f59bfc4
 
7051fa3
f59bfc4
a3e4fd3
f59bfc4
 
45e75e6
f59bfc4
 
 
7051fa3
f59bfc4
 
 
7051fa3
f59bfc4
 
ababcd4
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acfb9de
5cb0ade
a3e4fd3
f59bfc4
 
 
 
 
 
06f79f7
f59bfc4
a3e4fd3
7051fa3
f59bfc4
 
7051fa3
5cb0ade
 
f59bfc4
 
 
 
 
 
 
 
ababcd4
5cb0ade
f59bfc4
5cb0ade
f59bfc4
 
 
 
ababcd4
5cb0ade
409899f
f59bfc4
 
 
 
5cb0ade
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
f59bfc4
91d8afe
7051fa3
f59bfc4
7051fa3
5cb0ade
ababcd4
a3e4fd3
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd8b15b
a3e4fd3
f59bfc4
a3e4fd3
5cb0ade
acfb9de
f59bfc4
 
 
7051fa3
f59bfc4
 
 
 
 
7051fa3
 
f59bfc4
a9b1efa
f59bfc4

from flask import Flask, render_template, request, jsonify, send_file
try:
    from services.aggregator import collect_data
except Exception as e:
    print(f"❌ FATAL: aggregator gagal load: {e}")
    def collect_data(kw, src="all"): return [("unknown", "aggregator error")]

try:
    from services.sentiment import predict_with_score
except Exception as e:
    print(f"⚠️  sentiment gagal load: {e} — rule-based fallback")
    def predict_with_score(texts):
        def _rb(t):
            pos = sum(1 for k in ['bagus','baik','senang','suka','mantap','oke','good','great'] if k in t.lower())
            neg = sum(1 for k in ['buruk','jelek','benci','kecewa','gagal','bad','worst'] if k in t.lower())
            label = 'Positive' if pos > neg else 'Negative' if neg > pos else 'Neutral'
            return {'label': label, 'score': 0.5}
        return [_rb(t) for t in texts]

from collections import Counter
import pandas as pd
import os, re
import numpy as np
from datetime import datetime

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import networkx as nx
from itertools import combinations
from wordcloud import WordCloud

# Deep preprocessing
try:
    from services.preprocessing_id import clean_text_deep, batch_clean, STOPWORDS
    DEEP_PREP = True
    print("✅ Deep preprocessing loaded")
except ImportError:
    DEEP_PREP = False
    STOPWORDS = {'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada','pada','juga','tidak','bisa','sudah','the','is','in','of','a','an','and','it'}
    def clean_text_deep(t):
        t = t.lower()
        t = re.sub(r'http\S+', '', t)
        t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)
        return re.sub(r'\s+', ' ', t).strip()
    def batch_clean(texts): return [clean_text_deep(t) for t in texts]

try:
    from services.bot_bert import detect_bot_bert
except Exception:
    def detect_bot_bert(x): return []

try:
    from services.fake_news import detect_fake_news
except Exception:
    def detect_fake_news(x): return []

# ── New NLP Services ──
try:
    from services.absa import analyze_absa
except Exception as e:
    print(f"⚠️  ABSA not available: {e}")
    def analyze_absa(x): return {'top_aspects':[],'aggregate':{},'aspect_sentiment_map':{}}

try:
    from services.ner import analyze_ner
except Exception as e:
    print(f"⚠️  NER not available: {e}")
    def analyze_ner(x): return {'top_entities':[],'entities_by_type':{}}

try:
    from services.advanced_nlp import (
        analyze_stance, analyze_emotions,
        extract_keywords, summarize_by_platform
    )
except Exception as e:
    print(f"⚠️  Advanced NLP not available: {e}")
    def analyze_stance(x, t=None): return {'counts':{},'dominant':'Neutral','favor_pct':0,'against_pct':0,'neutral_pct':0}
    def analyze_emotions(x): return {'distribution':{},'dominant':'neutral','emotional_pct':0}
    def extract_keywords(x, n=20): return []
    def summarize_by_platform(x): return {}

app = Flask(__name__)
CONF_THRESHOLD = 0.60

# ── HOAX CLASSIFIER ──
_HX = ["berita ini bohong dan tidak benar","ini propaganda yang menyesatkan","jangan percaya hoax yang beredar","informasi palsu disebarkan untuk memfitnah","disinformasi sengaja dibuat untuk menipu","berita palsu sangat meresahkan warga","menyebarkan kebohongan dan fitnah","manipulasi politik yang berbahaya","provokasi untuk memecah belah bangsa","ujaran kebencian dan fitnah","waspada berita bohong sengaja disebarkan","hoaks sudah dibantah pihak berwenang","informasi menyesatkan tidak ada bukti","narasi sesat untuk mengadu domba","berita manipulatif perlu diklarifikasi","produk ini sangat bagus dan berkualitas","saya sangat senang dengan pelayanannya","hasil kerja tim ini luar biasa","kebijakan ini berdampak positif masyarakat","acara kemarin berjalan lancar dan meriah","terima kasih atas bantuan yang diberikan","pemerintah berupaya meningkatkan kesejahteraan","inovasi terbaru sangat membantu kehidupan","prestasi luar biasa yang membanggakan","kondisi ekonomi mulai membaik dari data","program ini memberikan manfaat nyata","kolaborasi baik menghasilkan output optimal","penelitian ini memberikan temuan menarik","masyarakat antusias menyambut kebijakan baru","kualitas pendidikan terus meningkat"]
_HY = [1]*15 + [0]*15
_hoax_clf = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), max_features=500, sublinear_tf=True)), ('clf', LogisticRegression(C=1.0, max_iter=200, random_state=42, class_weight='balanced'))])
try:
    _hoax_clf.fit(_HX, _HY)
    print("✅ Hoax classifier ready")
except Exception as e:
    print(f"⚠️ Hoax error: {e}")
    _hoax_clf = None

# ── CONFIDENCE FILTER (Priority 2) ──
def apply_confidence_filter(scored, threshold=CONF_THRESHOLD):
    result = []
    for item in scored:
        conf = item.get('score', 0)
        label = item.get('label', 'Neutral')
        result.append({**item, 'sentiment': label if conf >= threshold else 'Uncertain', 'confidence': round(conf, 4), 'is_certain': conf >= threshold})
    return result

def confidence_stats(result_data):
    by_class = {'Positive':[], 'Negative':[], 'Neutral':[], 'Uncertain':[]}
    for r in result_data:
        s = r.get('sentiment','Neutral')
        c = r.get('confidence',0)
        if s in by_class: by_class[s].append(c)
        else: by_class['Uncertain'].append(c)
    stats = {}
    for cls, vals in by_class.items():
        if vals:
            stats[cls] = {'count':len(vals),'mean':round(float(np.mean(vals)),3),'std':round(float(np.std(vals)),3),'min':round(float(np.min(vals)),3),'max':round(float(np.max(vals)),3)}
        else:
            stats[cls] = {'count':0,'mean':0,'std':0,'min':0,'max':0}
    all_conf = [r.get('confidence',0) for r in result_data]
    return {'by_class':stats,'buckets':{'high (≥0.8)':sum(1 for c in all_conf if c>=0.8),'med (0.6-0.8)':sum(1 for c in all_conf if 0.6<=c<0.8),'low (<0.6)':sum(1 for c in all_conf if c<0.6)},'uncertain_count':sum(1 for r in result_data if not r.get('is_certain',True)),'avg_confidence':round(float(np.mean(all_conf)),3) if all_conf else 0}

# ── CROSS-PLATFORM ANALYSIS (Priority 3) ──
def cross_platform_analysis(result_data):
    platforms = {}
    for r in result_data:
        src = r.get('source','unknown'); sent = r.get('sentiment','Neutral'); conf = r.get('confidence',0)
        if src not in platforms:
            platforms[src] = {'Positive':0,'Negative':0,'Neutral':0,'Uncertain':0,'total':0,'conf_sum':0}
        if sent in platforms[src]: platforms[src][sent] += 1
        else: platforms[src]['Uncertain'] += 1
        platforms[src]['total'] += 1
        platforms[src]['conf_sum'] += conf
    platform_stats = {}
    for src, c in platforms.items():
        t = c['total'] or 1
        pos_r = c['Positive']/t; neg_r = c['Negative']/t; neu_r = c['Neutral']/t
        platform_stats[src] = {'total':t,'pos_count':c['Positive'],'neg_count':c['Negative'],'neu_count':c['Neutral'],'unc_count':c['Uncertain'],'pos_pct':round(pos_r*100,1),'neg_pct':round(neg_r*100,1),'neu_pct':round(neu_r*100,1),'unc_pct':round(c['Uncertain']/t*100,1),'polarity':round(abs(pos_r-neg_r),3),'avg_conf':round(c['conf_sum']/t,3),'dominant':max(['Positive','Negative','Neutral','Uncertain'],key=lambda s:c[s])}
    if not platform_stats:
        return {'platforms':{},'pairwise':[],'insights':[],'most_positive':None,'most_negative':None,'most_polarized':None}
    srcs = list(platform_stats.keys())
    most_positive  = max(srcs, key=lambda s: platform_stats[s]['pos_pct'])
    most_negative  = max(srcs, key=lambda s: platform_stats[s]['neg_pct'])
    most_polarized = max(srcs, key=lambda s: platform_stats[s]['polarity'])
    pairwise = []
    for i in range(len(srcs)):
        for j in range(i+1, len(srcs)):
            a, b = srcs[i], srcs[j]
            diff = round(abs(platform_stats[a]['pos_pct']-platform_stats[b]['pos_pct']),1)
            pairwise.append({'platform_a':a,'platform_b':b,'pos_diff':diff,'description':f"{a} vs {b}: selisih sentimen positif {diff}%"})
    insights = []
    if len(srcs) > 1:
        insights.append(f"{most_positive.capitalize()} memiliki sentimen positif tertinggi ({platform_stats[most_positive]['pos_pct']}%).")
        insights.append(f"{most_negative.capitalize()} memiliki sentimen negatif tertinggi ({platform_stats[most_negative]['neg_pct']}%).")
        insights.append(f"{most_polarized.capitalize()} paling terpolarisasi (indeks {platform_stats[most_polarized]['polarity']}).")
    return {'platforms':platform_stats,'pairwise':pairwise,'insights':insights,'most_positive':most_positive,'most_negative':most_negative,'most_polarized':most_polarized}

def generate_comparative_chart(cross_data):
    try:
        platforms = cross_data.get('platforms',{})
        if len(platforms) < 2: return
        os.makedirs("static", exist_ok=True)
        srcs = list(platforms.keys())
        pos = [platforms[s]['pos_pct'] for s in srcs]
        neg = [platforms[s]['neg_pct'] for s in srcs]
        neu = [platforms[s]['neu_pct'] for s in srcs]
        pol = [platforms[s]['polarity']*100 for s in srcs]
        cnf = [platforms[s]['avg_conf']*100 for s in srcs]
        x = np.arange(len(srcs)); w = 0.26
        fig, axes = plt.subplots(1, 2, figsize=(13,4))
        fig.patch.set_facecolor('#0e1117')
        for ax in axes: ax.set_facecolor('#141820')
        axes[0].bar(x-w, pos, w, label='Positif',  color='#22c55e', alpha=0.85)
        axes[0].bar(x,   neg, w, label='Negatif',  color='#ef4444', alpha=0.85)
        axes[0].bar(x+w, neu, w, label='Netral',   color='#94a3b8', alpha=0.85)
        axes[0].set_xticks(x); axes[0].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
        axes[0].set_title('Distribusi Sentimen per Platform', color='#e8eaf0', fontsize=10)
        axes[0].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
        axes[0].set_ylim(0,105); axes[0].tick_params(colors='#5a6070')
        axes[1].bar(x-0.2, pol, 0.38, label='Polarisasi ×100', color='#f59e0b', alpha=0.8)
        axes[1].bar(x+0.2, cnf, 0.38, label='Avg Confidence %', color='#4f9cf9', alpha=0.8)
        axes[1].set_xticks(x); axes[1].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
        axes[1].set_title('Polarisasi & Confidence per Platform', color='#e8eaf0', fontsize=10)
        axes[1].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
        axes[1].set_ylim(0,110); axes[1].tick_params(colors='#5a6070')
        for ax in axes:
            for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
        plt.tight_layout(pad=1.5)
        plt.savefig("static/comparative.png", dpi=110, facecolor=fig.get_facecolor())
        plt.close(fig)
    except Exception as e:
        print(f"comparative chart error: {e}")

# ── RICH EXPORT (Priority 4) ──
def build_export_data(result_data, keyword, source, conf_stats, cross_data, trend):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    main_rows = [{'index':i+1,'text':r.get('text',''),'text_length':len(r.get('text','').split()),'sentiment':r.get('sentiment',''),'confidence':r.get('confidence',0),'is_certain':r.get('is_certain',True),'source':r.get('source',''),'scraped_at':r.get('scraped_at',ts),'keyword':keyword} for i,r in enumerate(result_data)]
    total = len(result_data) or 1
    pos = sum(1 for r in result_data if r.get('sentiment')=='Positive')
    neg = sum(1 for r in result_data if r.get('sentiment')=='Negative')
    neu = sum(1 for r in result_data if r.get('sentiment')=='Neutral')
    unc = sum(1 for r in result_data if r.get('sentiment')=='Uncertain')
    summary = {'keyword':keyword,'source':source,'analyzed_at':ts,'total_samples':total,'positive_count':pos,'negative_count':neg,'neutral_count':neu,'uncertain_count':unc,'positive_pct':round(pos/total*100,1),'negative_pct':round(neg/total*100,1),'neutral_pct':round(neu/total*100,1),'uncertain_pct':round(unc/total*100,1),'avg_confidence':conf_stats.get('avg_confidence',0),'trend_label':trend.get('label',''),'polarity_index':trend.get('polarity',0),'most_positive_platform':cross_data.get('most_positive','')}
    return {'main':main_rows,'summary':summary}

def save_export_csv(export):
    os.makedirs("static", exist_ok=True)
    pd.DataFrame(export['main']).to_csv("static/result.csv", index=False)
    pd.DataFrame([export['summary']]).to_csv("static/summary.csv", index=False)

# ── CORE FUNCTIONS ──
def get_top_words(texts):
    words = []
    for t in texts:
        for w in clean_text_deep(t).split():
            if len(w) > 2 and w not in STOPWORDS: words.append(w)
    return [{"word":w,"count":c} for w,c in Counter(words).most_common(15)]

def generate_wordcloud(texts):
    try:
        os.makedirs("static", exist_ok=True)
        combined = " ".join(batch_clean(texts))
        if not combined.strip(): return
        WordCloud(width=900,height=380,background_color='#0e1117',color_func=lambda *a,**k:'#4f9cf9',max_words=80,stopwords=STOPWORDS).generate(combined).to_file("static/wordcloud.png")
    except Exception as e: print(f"wordcloud error: {e}")

def generate_heatmap(data):
    try:
        if not data: return
        labels = ["Positive","Neutral","Negative","Uncertain"]
        sources = sorted(set(d["source"] for d in data))
        matrix = np.zeros((len(sources),len(labels)))
        for d in data:
            i = sources.index(d["source"]); s = d["sentiment"]
            j = labels.index(s) if s in labels else 3
            matrix[i][j] += 1
        if matrix.sum()==0: return
        fig, ax = plt.subplots(figsize=(7,max(2,len(sources))))
        fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
        im = ax.imshow(matrix, cmap='Blues', aspect='auto')
        ax.set_xticks(range(len(labels))); ax.set_xticklabels(labels, color='#8892a4', fontsize=9)
        ax.set_yticks(range(len(sources))); ax.set_yticklabels(sources, color='#8892a4', fontsize=9)
        ax.tick_params(colors='#5a6070'); plt.colorbar(im, ax=ax); plt.tight_layout()
        os.makedirs("static", exist_ok=True)
        plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor()); plt.close(fig)
    except Exception as e: print(f"heatmap error: {e}")

def generate_timeline(data):
    try:
        if not data or len(data)<3: return
        os.makedirs("static", exist_ok=True)
        window = max(5,len(data)//10)
        def roll(arr,w): return [sum(arr[max(0,i-w+1):i+1])/len(arr[max(0,i-w+1):i+1]) for i in range(len(arr))]
        pos_r=[1 if d["sentiment"]=="Positive"  else 0 for d in data]
        neg_r=[1 if d["sentiment"]=="Negative"  else 0 for d in data]
        neu_r=[1 if d["sentiment"]=="Neutral"   else 0 for d in data]
        unc_r=[1 if d["sentiment"]=="Uncertain" else 0 for d in data]
        x=list(range(1,len(data)+1))
        fig,ax=plt.subplots(figsize=(11,3.5)); fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
        ax.fill_between(x,roll(pos_r,window),alpha=0.12,color='#22c55e')
        ax.fill_between(x,roll(neg_r,window),alpha=0.12,color='#ef4444')
        ax.plot(x,roll(pos_r,window),color='#22c55e',lw=1.8,label='Positif')
        ax.plot(x,roll(neg_r,window),color='#ef4444',lw=1.8,label='Negatif')
        ax.plot(x,roll(neu_r,window),color='#94a3b8',lw=1.2,ls='--',label='Netral')
        ax.plot(x,roll(unc_r,window),color='#f59e0b',lw=1.0,ls=':',label='Uncertain')
        ax.axhline(np.mean(pos_r),color='#22c55e',lw=0.6,ls=':',alpha=0.5)
        ax.axhline(np.mean(neg_r),color='#ef4444',lw=0.6,ls=':',alpha=0.5)
        ax.set_xlabel(f'Urutan komentar (rolling mean, window={window})',color='#5a6070',fontsize=8)
        ax.set_ylabel('Proporsi',color='#5a6070',fontsize=8); ax.tick_params(colors='#5a6070',labelsize=7)
        for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
        ax.legend(fontsize=8,facecolor='#141820',edgecolor='#1a2030',labelcolor='#8892a4')
        ax.set_ylim(0,1.05); ax.set_xlim(1,len(data)); plt.tight_layout(pad=1.0)
        plt.savefig("static/timeline.png",dpi=110,facecolor=fig.get_facecolor()); plt.close(fig)
    except Exception as e: print(f"timeline error: {e}")

def predict_trend(data):
    if not data: return {"label":"Kurang Data","dominant":"Neutral","polarity":0.0,"confidence":0.0,"by_source":{},"summary":"Tidak ada data."}
    sentiments=[d["sentiment"] for d in data]; total=len(sentiments)
    pos=sentiments.count("Positive"); neg=sentiments.count("Negative"); neu=sentiments.count("Neutral")
    pos_r,neg_r,neu_r=pos/total,neg/total,neu/total; polarity=round(abs(pos_r-neg_r),3)
    by_source={}
    for d in data:
        src=d.get("source","unknown")
        if src not in by_source: by_source[src]={"Positive":0,"Negative":0,"Neutral":0,"Uncertain":0,"total":0}
        s=d["sentiment"]
        if s in by_source[src]: by_source[src][s]+=1
        else: by_source[src]["Uncertain"]+=1
        by_source[src]["total"]+=1
    for src in by_source:
        t=by_source[src]["total"]
        by_source[src]["pos_pct"]=round(by_source[src]["Positive"]/t*100,1)
        by_source[src]["neg_pct"]=round(by_source[src]["Negative"]/t*100,1)
        by_source[src]["neu_pct"]=round(by_source[src]["Neutral"] /t*100,1)
    if pos_r>neg_r and pos_r>neu_r: label,dominant,conf="Dominan Positif","Positive",round(pos_r,3)
    elif neg_r>pos_r and neg_r>neu_r: label,dominant,conf="Dominan Negatif","Negative",round(neg_r,3)
    elif neu_r>=0.5: label,dominant,conf="Mayoritas Netral","Neutral",round(neu_r,3)
    else: label,dominant,conf="Terpolarisasi","Mixed",round(polarity,3)
    dom_src=max(by_source,key=lambda s:by_source[s]["total"]) if by_source else "-"
    return {"label":label,"dominant":dominant,"polarity":polarity,"confidence":conf,"by_source":by_source,"pos_pct":round(pos_r*100,1),"neg_pct":round(neg_r*100,1),"neu_pct":round(neu_r*100,1),"summary":f"{label} ({round(pos_r*100,1)}% positif, {round(neg_r*100,1)}% negatif, {round(neu_r*100,1)}% netral). Indeks polarisasi: {polarity:.2f}. Sumber terbanyak: {dom_src}."}

def detect_hoax(texts):
    results=[]; sample=texts[:20]
    if _hoax_clf is not None:
        try:
            preds=_hoax_clf.predict(sample); probas=_hoax_clf.predict_proba(sample)
            for t,p,pr in zip(sample,preds,probas):
                results.append({"text":t,"label":"Hoax" if p==1 else "Normal","confidence":round(float(max(pr)),3),"method":"ml"})
            return results
        except: pass
    KW=["hoax","bohong","fitnah","propaganda","palsu","disinformasi","menyesatkan","kebohongan","manipulasi","adu domba","provokasi","berita palsu","ujaran kebencian","tidak benar","narasi sesat"]
    for t in sample:
        sc=sum(1 for k in KW if k in t.lower()); lbl="Hoax" if sc>=1 else "Normal"
        results.append({"text":t,"label":lbl,"confidence":min(0.5+sc*0.1,0.95) if lbl=="Hoax" else 0.6,"method":"keyword"})
    return results

def get_topics(texts):
    try:
        cleaned=batch_clean(texts); cleaned=[t for t in cleaned if len(t)>3]
        if len(cleaned)<5: return [["data kurang"]]
        vec=CountVectorizer(min_df=2,stop_words=list(STOPWORDS)); X=vec.fit_transform(cleaned)
        if X.shape[1]==0: return [["kosong"]]
        n=min(3,X.shape[1]); lda=LatentDirichletAllocation(n_components=n,random_state=42); lda.fit(X)
        words=vec.get_feature_names_out()
        return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_]
    except Exception as e: print(f"topic error: {e}"); return [["error"]]

def generate_insight(data):
    s=[d["sentiment"] for d in data]
    return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')} Uncertain:{s.count('Uncertain')}"

def cluster_opinions(texts):
    try:
        if len(texts)<6: return []
        cleaned=batch_clean(texts)
        X=TfidfVectorizer(max_features=300,stop_words=list(STOPWORDS)).fit_transform(cleaned)
        n=min(3,len(texts)); k=KMeans(n_clusters=n,n_init=10,random_state=42).fit(X)
        clusters={}
        for i,lbl in enumerate(k.labels_): clusters.setdefault(int(lbl),[]).append(texts[i])
        return [{"cluster":l,"samples":s[:3]} for l,s in clusters.items()]
    except Exception as e: print(f"cluster error: {e}"); return []

def build_network(texts):
    edges={}
    for t in texts:
        words=[w for w in set(clean_text_deep(t).split()) if len(w)>3 and w not in STOPWORDS][:6]
        for a,b in combinations(words,2):
            key=tuple(sorted([a,b])); edges[key]=edges.get(key,0)+1
    return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]

def detect_bot_network(texts):
    try:
        if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
        X=TfidfVectorizer(max_features=300).fit_transform(texts); sim=cosine_similarity(X)
        G=nx.Graph()
        for i in range(len(texts)): G.add_node(i,text=texts[i])
        for i in range(len(texts)):
            for j in range(i+1,len(texts)):
                if sim[i][j]>0.75: G.add_edge(i,j)
        central=nx.degree_centrality(G)
        bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
        return {"nodes":[{"id":i} for i in G.nodes()],"edges":[{"source":u,"target":v} for u,v in G.edges()],"bots":bots[:10]}
    except Exception as e: print(f"bot error: {e}"); return {"nodes":[],"edges":[],"bots":[]}

def run_gnn_safe(nodes, edges, texts):
    if not nodes or not edges or len(nodes)<3:
        return [{"node":n["id"],"score":0.0} for n in nodes]
    try:
        import torch
        from torch_geometric.data import Data
        from torch_geometric.nn import GCNConv
        node_texts=[texts[n["id"]] if n["id"]<len(texts) else "" for n in nodes]
        vec=TfidfVectorizer(max_features=32,min_df=1)
        try: X=vec.fit_transform(node_texts).toarray()
        except: X=np.eye(len(nodes),32)
        x=torch.tensor(X,dtype=torch.float)
        edge_list=[[e["source"],e["target"]] for e in edges if e["source"]<len(nodes) and e["target"]<len(nodes)]
        if not edge_list: return [{"node":n["id"],"score":0.0} for n in nodes]
        edge_index=torch.tensor(edge_list,dtype=torch.long).t().contiguous()
        class GCN(torch.nn.Module):
            def __init__(self,in_ch):
                super().__init__(); self.conv1=GCNConv(in_ch,16); self.conv2=GCNConv(16,4)
            def forward(self,x,ei): return self.conv2(torch.relu(self.conv1(x,ei)),ei)
        torch.manual_seed(42); model=GCN(x.shape[1]); model.eval()
        with torch.no_grad(): out=model(x,edge_index)
        scores=torch.norm(out,dim=1).numpy()
        if scores.max()>scores.min(): scores=(scores-scores.min())/(scores.max()-scores.min())
        else: scores=np.zeros(len(scores))
        return [{"node":nodes[i]["id"],"score":round(float(scores[i]),3)} for i in range(len(nodes))]
    except Exception as e: print(f"GNN error: {e}"); return [{"node":n["id"],"score":0.0} for n in nodes]

# ── ROUTES ──
@app.route("/")
def home(): return render_template("index.html")

@app.route("/result")
def result(): return render_template("result.html")

@app.route("/analyze", methods=["POST"])
def analyze():
    try:
        body=request.json or {}
        keyword=body.get("keyword","").strip(); source=body.get("source","all")
        conf_th=float(body.get("conf_threshold",CONF_THRESHOLD))
        if not keyword: return jsonify({"error":"keyword kosong","data":[]}),400
        raw=collect_data(keyword,source)
        texts=[t for _,t in raw][:100]; sources=[s for s,_ in raw][:100]
        scored=predict_with_score(texts)
        scored_filtered=apply_confidence_filter(scored,threshold=conf_th)
        result_data=[{"text":t,"sentiment":s["sentiment"],"confidence":s["confidence"],"is_certain":s["is_certain"],"source":src,"scraped_at":datetime.now().strftime("%Y-%m-%d %H:%M")} for t,s,src in zip(texts,scored_filtered,sources)]
        conf_stats_data=confidence_stats(result_data)
        cross_data=cross_platform_analysis(result_data)
        generate_comparative_chart(cross_data)
        generate_wordcloud(texts); generate_heatmap(result_data); generate_timeline(result_data)
        top_words=get_top_words(texts); topics=get_topics(texts); insight=generate_insight(result_data)
        clusters=cluster_opinions(texts); trend=predict_trend(result_data); hoax=detect_hoax(texts)
        network=build_network(texts); bot_network=detect_bot_network(texts)
        gnn=run_gnn_safe(bot_network["nodes"],bot_network["edges"],texts)
        bot_bert=detect_bot_bert(texts); fake_news=detect_fake_news(texts)
        export=build_export_data(result_data,keyword,source,conf_stats_data,cross_data,trend)
        save_export_csv(export)
        return jsonify({"data":result_data,"top_words":top_words,"topics":topics,"insight":insight,"clusters":clusters,"hoax":hoax,"network":network,"bot_network":bot_network,"trend":trend,"bot_bert":bot_bert,"fake_news":fake_news,"gnn":gnn,"conf_stats":conf_stats_data,"cross_platform":cross_data,"export_summary":export["summary"],"absa":absa_result,"ner":ner_result,"stance":stance_result,"emotions":emotion_result,"keywords":keywords_result,"summaries":summaries})
    except Exception as e:
        print(f"ERROR /analyze: {e}"); return jsonify({"error":str(e),"data":[]}),500

@app.route("/download")
def download():
    path="static/result.csv"
    if not os.path.exists(path): return jsonify({"error":"Belum ada hasil"}),404
    return send_file(path,as_attachment=True)

@app.route("/download/summary")
def download_summary():
    path="static/summary.csv"
    if not os.path.exists(path): return jsonify({"error":"Belum ada summary"}),404
    return send_file(path,as_attachment=True)

@app.route("/static/<path:filename>")
def static_files(filename): return send_file(f"static/{filename}")

if __name__=="__main__":
    app.run(host="0.0.0.0",port=7860,debug=False)