File size: 26,280 Bytes
ababcd4
adcfce4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ababcd4
91d8afe
acfb9de
f59bfc4
5cb0ade
06f79f7
ababcd4
7051fa3
06f79f7
ababcd4
 
409899f
5cb0ade
409899f
5cb0ade
06f79f7
 
5cb0ade
 
06f79f7
5cb0ade
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
 
7051fa3
5cb0ade
 
 
 
7051fa3
5cb0ade
 
dd8b15b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
f59bfc4
91d8afe
f59bfc4
 
 
 
06f79f7
f59bfc4
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
f59bfc4
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
06f79f7
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
 
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06f79f7
a3e4fd3
 
 
f59bfc4
 
 
 
a3e4fd3
ababcd4
a3e4fd3
f59bfc4
 
7051fa3
f59bfc4
a3e4fd3
f59bfc4
 
45e75e6
f59bfc4
 
 
7051fa3
f59bfc4
 
 
7051fa3
f59bfc4
 
ababcd4
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
acfb9de
5cb0ade
a3e4fd3
f59bfc4
 
 
 
 
 
06f79f7
f59bfc4
a3e4fd3
7051fa3
f59bfc4
 
7051fa3
5cb0ade
 
f59bfc4
 
 
 
 
 
 
 
ababcd4
5cb0ade
f59bfc4
5cb0ade
f59bfc4
 
 
 
ababcd4
5cb0ade
409899f
f59bfc4
 
 
 
5cb0ade
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cb0ade
f59bfc4
91d8afe
7051fa3
f59bfc4
7051fa3
5cb0ade
ababcd4
a3e4fd3
f59bfc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd8b15b
a3e4fd3
f59bfc4
a3e4fd3
5cb0ade
acfb9de
f59bfc4
 
 
7051fa3
f59bfc4
 
 
 
 
7051fa3
 
f59bfc4
a9b1efa
f59bfc4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
from flask import Flask, render_template, request, jsonify, send_file
try:
    from services.aggregator import collect_data
except Exception as e:
    print(f"❌ FATAL: aggregator gagal load: {e}")
    def collect_data(kw, src="all"): return [("unknown", "aggregator error")]

try:
    from services.sentiment import predict_with_score
except Exception as e:
    print(f"⚠️  sentiment gagal load: {e} β€” rule-based fallback")
    def predict_with_score(texts):
        def _rb(t):
            pos = sum(1 for k in ['bagus','baik','senang','suka','mantap','oke','good','great'] if k in t.lower())
            neg = sum(1 for k in ['buruk','jelek','benci','kecewa','gagal','bad','worst'] if k in t.lower())
            label = 'Positive' if pos > neg else 'Negative' if neg > pos else 'Neutral'
            return {'label': label, 'score': 0.5}
        return [_rb(t) for t in texts]

from collections import Counter
import pandas as pd
import os, re
import numpy as np
from datetime import datetime

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import networkx as nx
from itertools import combinations
from wordcloud import WordCloud

# Deep preprocessing
try:
    from services.preprocessing_id import clean_text_deep, batch_clean, STOPWORDS
    DEEP_PREP = True
    print("βœ… Deep preprocessing loaded")
except ImportError:
    DEEP_PREP = False
    STOPWORDS = {'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada','pada','juga','tidak','bisa','sudah','the','is','in','of','a','an','and','it'}
    def clean_text_deep(t):
        t = t.lower()
        t = re.sub(r'http\S+', '', t)
        t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)
        return re.sub(r'\s+', ' ', t).strip()
    def batch_clean(texts): return [clean_text_deep(t) for t in texts]

try:
    from services.bot_bert import detect_bot_bert
except Exception:
    def detect_bot_bert(x): return []

try:
    from services.fake_news import detect_fake_news
except Exception:
    def detect_fake_news(x): return []

# ── New NLP Services ──
try:
    from services.absa import analyze_absa
except Exception as e:
    print(f"⚠️  ABSA not available: {e}")
    def analyze_absa(x): return {'top_aspects':[],'aggregate':{},'aspect_sentiment_map':{}}

try:
    from services.ner import analyze_ner
except Exception as e:
    print(f"⚠️  NER not available: {e}")
    def analyze_ner(x): return {'top_entities':[],'entities_by_type':{}}

try:
    from services.advanced_nlp import (
        analyze_stance, analyze_emotions,
        extract_keywords, summarize_by_platform
    )
except Exception as e:
    print(f"⚠️  Advanced NLP not available: {e}")
    def analyze_stance(x, t=None): return {'counts':{},'dominant':'Neutral','favor_pct':0,'against_pct':0,'neutral_pct':0}
    def analyze_emotions(x): return {'distribution':{},'dominant':'neutral','emotional_pct':0}
    def extract_keywords(x, n=20): return []
    def summarize_by_platform(x): return {}

app = Flask(__name__)
CONF_THRESHOLD = 0.60

# ── HOAX CLASSIFIER ──
_HX = ["berita ini bohong dan tidak benar","ini propaganda yang menyesatkan","jangan percaya hoax yang beredar","informasi palsu disebarkan untuk memfitnah","disinformasi sengaja dibuat untuk menipu","berita palsu sangat meresahkan warga","menyebarkan kebohongan dan fitnah","manipulasi politik yang berbahaya","provokasi untuk memecah belah bangsa","ujaran kebencian dan fitnah","waspada berita bohong sengaja disebarkan","hoaks sudah dibantah pihak berwenang","informasi menyesatkan tidak ada bukti","narasi sesat untuk mengadu domba","berita manipulatif perlu diklarifikasi","produk ini sangat bagus dan berkualitas","saya sangat senang dengan pelayanannya","hasil kerja tim ini luar biasa","kebijakan ini berdampak positif masyarakat","acara kemarin berjalan lancar dan meriah","terima kasih atas bantuan yang diberikan","pemerintah berupaya meningkatkan kesejahteraan","inovasi terbaru sangat membantu kehidupan","prestasi luar biasa yang membanggakan","kondisi ekonomi mulai membaik dari data","program ini memberikan manfaat nyata","kolaborasi baik menghasilkan output optimal","penelitian ini memberikan temuan menarik","masyarakat antusias menyambut kebijakan baru","kualitas pendidikan terus meningkat"]
_HY = [1]*15 + [0]*15
_hoax_clf = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), max_features=500, sublinear_tf=True)), ('clf', LogisticRegression(C=1.0, max_iter=200, random_state=42, class_weight='balanced'))])
try:
    _hoax_clf.fit(_HX, _HY)
    print("βœ… Hoax classifier ready")
except Exception as e:
    print(f"⚠️ Hoax error: {e}")
    _hoax_clf = None

# ── CONFIDENCE FILTER (Priority 2) ──
def apply_confidence_filter(scored, threshold=CONF_THRESHOLD):
    result = []
    for item in scored:
        conf = item.get('score', 0)
        label = item.get('label', 'Neutral')
        result.append({**item, 'sentiment': label if conf >= threshold else 'Uncertain', 'confidence': round(conf, 4), 'is_certain': conf >= threshold})
    return result

def confidence_stats(result_data):
    by_class = {'Positive':[], 'Negative':[], 'Neutral':[], 'Uncertain':[]}
    for r in result_data:
        s = r.get('sentiment','Neutral')
        c = r.get('confidence',0)
        if s in by_class: by_class[s].append(c)
        else: by_class['Uncertain'].append(c)
    stats = {}
    for cls, vals in by_class.items():
        if vals:
            stats[cls] = {'count':len(vals),'mean':round(float(np.mean(vals)),3),'std':round(float(np.std(vals)),3),'min':round(float(np.min(vals)),3),'max':round(float(np.max(vals)),3)}
        else:
            stats[cls] = {'count':0,'mean':0,'std':0,'min':0,'max':0}
    all_conf = [r.get('confidence',0) for r in result_data]
    return {'by_class':stats,'buckets':{'high (β‰₯0.8)':sum(1 for c in all_conf if c>=0.8),'med (0.6-0.8)':sum(1 for c in all_conf if 0.6<=c<0.8),'low (<0.6)':sum(1 for c in all_conf if c<0.6)},'uncertain_count':sum(1 for r in result_data if not r.get('is_certain',True)),'avg_confidence':round(float(np.mean(all_conf)),3) if all_conf else 0}

# ── CROSS-PLATFORM ANALYSIS (Priority 3) ──
def cross_platform_analysis(result_data):
    platforms = {}
    for r in result_data:
        src = r.get('source','unknown'); sent = r.get('sentiment','Neutral'); conf = r.get('confidence',0)
        if src not in platforms:
            platforms[src] = {'Positive':0,'Negative':0,'Neutral':0,'Uncertain':0,'total':0,'conf_sum':0}
        if sent in platforms[src]: platforms[src][sent] += 1
        else: platforms[src]['Uncertain'] += 1
        platforms[src]['total'] += 1
        platforms[src]['conf_sum'] += conf
    platform_stats = {}
    for src, c in platforms.items():
        t = c['total'] or 1
        pos_r = c['Positive']/t; neg_r = c['Negative']/t; neu_r = c['Neutral']/t
        platform_stats[src] = {'total':t,'pos_count':c['Positive'],'neg_count':c['Negative'],'neu_count':c['Neutral'],'unc_count':c['Uncertain'],'pos_pct':round(pos_r*100,1),'neg_pct':round(neg_r*100,1),'neu_pct':round(neu_r*100,1),'unc_pct':round(c['Uncertain']/t*100,1),'polarity':round(abs(pos_r-neg_r),3),'avg_conf':round(c['conf_sum']/t,3),'dominant':max(['Positive','Negative','Neutral','Uncertain'],key=lambda s:c[s])}
    if not platform_stats:
        return {'platforms':{},'pairwise':[],'insights':[],'most_positive':None,'most_negative':None,'most_polarized':None}
    srcs = list(platform_stats.keys())
    most_positive  = max(srcs, key=lambda s: platform_stats[s]['pos_pct'])
    most_negative  = max(srcs, key=lambda s: platform_stats[s]['neg_pct'])
    most_polarized = max(srcs, key=lambda s: platform_stats[s]['polarity'])
    pairwise = []
    for i in range(len(srcs)):
        for j in range(i+1, len(srcs)):
            a, b = srcs[i], srcs[j]
            diff = round(abs(platform_stats[a]['pos_pct']-platform_stats[b]['pos_pct']),1)
            pairwise.append({'platform_a':a,'platform_b':b,'pos_diff':diff,'description':f"{a} vs {b}: selisih sentimen positif {diff}%"})
    insights = []
    if len(srcs) > 1:
        insights.append(f"{most_positive.capitalize()} memiliki sentimen positif tertinggi ({platform_stats[most_positive]['pos_pct']}%).")
        insights.append(f"{most_negative.capitalize()} memiliki sentimen negatif tertinggi ({platform_stats[most_negative]['neg_pct']}%).")
        insights.append(f"{most_polarized.capitalize()} paling terpolarisasi (indeks {platform_stats[most_polarized]['polarity']}).")
    return {'platforms':platform_stats,'pairwise':pairwise,'insights':insights,'most_positive':most_positive,'most_negative':most_negative,'most_polarized':most_polarized}

def generate_comparative_chart(cross_data):
    try:
        platforms = cross_data.get('platforms',{})
        if len(platforms) < 2: return
        os.makedirs("static", exist_ok=True)
        srcs = list(platforms.keys())
        pos = [platforms[s]['pos_pct'] for s in srcs]
        neg = [platforms[s]['neg_pct'] for s in srcs]
        neu = [platforms[s]['neu_pct'] for s in srcs]
        pol = [platforms[s]['polarity']*100 for s in srcs]
        cnf = [platforms[s]['avg_conf']*100 for s in srcs]
        x = np.arange(len(srcs)); w = 0.26
        fig, axes = plt.subplots(1, 2, figsize=(13,4))
        fig.patch.set_facecolor('#0e1117')
        for ax in axes: ax.set_facecolor('#141820')
        axes[0].bar(x-w, pos, w, label='Positif',  color='#22c55e', alpha=0.85)
        axes[0].bar(x,   neg, w, label='Negatif',  color='#ef4444', alpha=0.85)
        axes[0].bar(x+w, neu, w, label='Netral',   color='#94a3b8', alpha=0.85)
        axes[0].set_xticks(x); axes[0].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
        axes[0].set_title('Distribusi Sentimen per Platform', color='#e8eaf0', fontsize=10)
        axes[0].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
        axes[0].set_ylim(0,105); axes[0].tick_params(colors='#5a6070')
        axes[1].bar(x-0.2, pol, 0.38, label='Polarisasi Γ—100', color='#f59e0b', alpha=0.8)
        axes[1].bar(x+0.2, cnf, 0.38, label='Avg Confidence %', color='#4f9cf9', alpha=0.8)
        axes[1].set_xticks(x); axes[1].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
        axes[1].set_title('Polarisasi & Confidence per Platform', color='#e8eaf0', fontsize=10)
        axes[1].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
        axes[1].set_ylim(0,110); axes[1].tick_params(colors='#5a6070')
        for ax in axes:
            for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
        plt.tight_layout(pad=1.5)
        plt.savefig("static/comparative.png", dpi=110, facecolor=fig.get_facecolor())
        plt.close(fig)
    except Exception as e:
        print(f"comparative chart error: {e}")

# ── RICH EXPORT (Priority 4) ──
def build_export_data(result_data, keyword, source, conf_stats, cross_data, trend):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    main_rows = [{'index':i+1,'text':r.get('text',''),'text_length':len(r.get('text','').split()),'sentiment':r.get('sentiment',''),'confidence':r.get('confidence',0),'is_certain':r.get('is_certain',True),'source':r.get('source',''),'scraped_at':r.get('scraped_at',ts),'keyword':keyword} for i,r in enumerate(result_data)]
    total = len(result_data) or 1
    pos = sum(1 for r in result_data if r.get('sentiment')=='Positive')
    neg = sum(1 for r in result_data if r.get('sentiment')=='Negative')
    neu = sum(1 for r in result_data if r.get('sentiment')=='Neutral')
    unc = sum(1 for r in result_data if r.get('sentiment')=='Uncertain')
    summary = {'keyword':keyword,'source':source,'analyzed_at':ts,'total_samples':total,'positive_count':pos,'negative_count':neg,'neutral_count':neu,'uncertain_count':unc,'positive_pct':round(pos/total*100,1),'negative_pct':round(neg/total*100,1),'neutral_pct':round(neu/total*100,1),'uncertain_pct':round(unc/total*100,1),'avg_confidence':conf_stats.get('avg_confidence',0),'trend_label':trend.get('label',''),'polarity_index':trend.get('polarity',0),'most_positive_platform':cross_data.get('most_positive','')}
    return {'main':main_rows,'summary':summary}

def save_export_csv(export):
    os.makedirs("static", exist_ok=True)
    pd.DataFrame(export['main']).to_csv("static/result.csv", index=False)
    pd.DataFrame([export['summary']]).to_csv("static/summary.csv", index=False)

# ── CORE FUNCTIONS ──
def get_top_words(texts):
    words = []
    for t in texts:
        for w in clean_text_deep(t).split():
            if len(w) > 2 and w not in STOPWORDS: words.append(w)
    return [{"word":w,"count":c} for w,c in Counter(words).most_common(15)]

def generate_wordcloud(texts):
    try:
        os.makedirs("static", exist_ok=True)
        combined = " ".join(batch_clean(texts))
        if not combined.strip(): return
        WordCloud(width=900,height=380,background_color='#0e1117',color_func=lambda *a,**k:'#4f9cf9',max_words=80,stopwords=STOPWORDS).generate(combined).to_file("static/wordcloud.png")
    except Exception as e: print(f"wordcloud error: {e}")

def generate_heatmap(data):
    try:
        if not data: return
        labels = ["Positive","Neutral","Negative","Uncertain"]
        sources = sorted(set(d["source"] for d in data))
        matrix = np.zeros((len(sources),len(labels)))
        for d in data:
            i = sources.index(d["source"]); s = d["sentiment"]
            j = labels.index(s) if s in labels else 3
            matrix[i][j] += 1
        if matrix.sum()==0: return
        fig, ax = plt.subplots(figsize=(7,max(2,len(sources))))
        fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
        im = ax.imshow(matrix, cmap='Blues', aspect='auto')
        ax.set_xticks(range(len(labels))); ax.set_xticklabels(labels, color='#8892a4', fontsize=9)
        ax.set_yticks(range(len(sources))); ax.set_yticklabels(sources, color='#8892a4', fontsize=9)
        ax.tick_params(colors='#5a6070'); plt.colorbar(im, ax=ax); plt.tight_layout()
        os.makedirs("static", exist_ok=True)
        plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor()); plt.close(fig)
    except Exception as e: print(f"heatmap error: {e}")

def generate_timeline(data):
    try:
        if not data or len(data)<3: return
        os.makedirs("static", exist_ok=True)
        window = max(5,len(data)//10)
        def roll(arr,w): return [sum(arr[max(0,i-w+1):i+1])/len(arr[max(0,i-w+1):i+1]) for i in range(len(arr))]
        pos_r=[1 if d["sentiment"]=="Positive"  else 0 for d in data]
        neg_r=[1 if d["sentiment"]=="Negative"  else 0 for d in data]
        neu_r=[1 if d["sentiment"]=="Neutral"   else 0 for d in data]
        unc_r=[1 if d["sentiment"]=="Uncertain" else 0 for d in data]
        x=list(range(1,len(data)+1))
        fig,ax=plt.subplots(figsize=(11,3.5)); fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
        ax.fill_between(x,roll(pos_r,window),alpha=0.12,color='#22c55e')
        ax.fill_between(x,roll(neg_r,window),alpha=0.12,color='#ef4444')
        ax.plot(x,roll(pos_r,window),color='#22c55e',lw=1.8,label='Positif')
        ax.plot(x,roll(neg_r,window),color='#ef4444',lw=1.8,label='Negatif')
        ax.plot(x,roll(neu_r,window),color='#94a3b8',lw=1.2,ls='--',label='Netral')
        ax.plot(x,roll(unc_r,window),color='#f59e0b',lw=1.0,ls=':',label='Uncertain')
        ax.axhline(np.mean(pos_r),color='#22c55e',lw=0.6,ls=':',alpha=0.5)
        ax.axhline(np.mean(neg_r),color='#ef4444',lw=0.6,ls=':',alpha=0.5)
        ax.set_xlabel(f'Urutan komentar (rolling mean, window={window})',color='#5a6070',fontsize=8)
        ax.set_ylabel('Proporsi',color='#5a6070',fontsize=8); ax.tick_params(colors='#5a6070',labelsize=7)
        for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
        ax.legend(fontsize=8,facecolor='#141820',edgecolor='#1a2030',labelcolor='#8892a4')
        ax.set_ylim(0,1.05); ax.set_xlim(1,len(data)); plt.tight_layout(pad=1.0)
        plt.savefig("static/timeline.png",dpi=110,facecolor=fig.get_facecolor()); plt.close(fig)
    except Exception as e: print(f"timeline error: {e}")

def predict_trend(data):
    if not data: return {"label":"Kurang Data","dominant":"Neutral","polarity":0.0,"confidence":0.0,"by_source":{},"summary":"Tidak ada data."}
    sentiments=[d["sentiment"] for d in data]; total=len(sentiments)
    pos=sentiments.count("Positive"); neg=sentiments.count("Negative"); neu=sentiments.count("Neutral")
    pos_r,neg_r,neu_r=pos/total,neg/total,neu/total; polarity=round(abs(pos_r-neg_r),3)
    by_source={}
    for d in data:
        src=d.get("source","unknown")
        if src not in by_source: by_source[src]={"Positive":0,"Negative":0,"Neutral":0,"Uncertain":0,"total":0}
        s=d["sentiment"]
        if s in by_source[src]: by_source[src][s]+=1
        else: by_source[src]["Uncertain"]+=1
        by_source[src]["total"]+=1
    for src in by_source:
        t=by_source[src]["total"]
        by_source[src]["pos_pct"]=round(by_source[src]["Positive"]/t*100,1)
        by_source[src]["neg_pct"]=round(by_source[src]["Negative"]/t*100,1)
        by_source[src]["neu_pct"]=round(by_source[src]["Neutral"] /t*100,1)
    if pos_r>neg_r and pos_r>neu_r: label,dominant,conf="Dominan Positif","Positive",round(pos_r,3)
    elif neg_r>pos_r and neg_r>neu_r: label,dominant,conf="Dominan Negatif","Negative",round(neg_r,3)
    elif neu_r>=0.5: label,dominant,conf="Mayoritas Netral","Neutral",round(neu_r,3)
    else: label,dominant,conf="Terpolarisasi","Mixed",round(polarity,3)
    dom_src=max(by_source,key=lambda s:by_source[s]["total"]) if by_source else "-"
    return {"label":label,"dominant":dominant,"polarity":polarity,"confidence":conf,"by_source":by_source,"pos_pct":round(pos_r*100,1),"neg_pct":round(neg_r*100,1),"neu_pct":round(neu_r*100,1),"summary":f"{label} ({round(pos_r*100,1)}% positif, {round(neg_r*100,1)}% negatif, {round(neu_r*100,1)}% netral). Indeks polarisasi: {polarity:.2f}. Sumber terbanyak: {dom_src}."}

def detect_hoax(texts):
    results=[]; sample=texts[:20]
    if _hoax_clf is not None:
        try:
            preds=_hoax_clf.predict(sample); probas=_hoax_clf.predict_proba(sample)
            for t,p,pr in zip(sample,preds,probas):
                results.append({"text":t,"label":"Hoax" if p==1 else "Normal","confidence":round(float(max(pr)),3),"method":"ml"})
            return results
        except: pass
    KW=["hoax","bohong","fitnah","propaganda","palsu","disinformasi","menyesatkan","kebohongan","manipulasi","adu domba","provokasi","berita palsu","ujaran kebencian","tidak benar","narasi sesat"]
    for t in sample:
        sc=sum(1 for k in KW if k in t.lower()); lbl="Hoax" if sc>=1 else "Normal"
        results.append({"text":t,"label":lbl,"confidence":min(0.5+sc*0.1,0.95) if lbl=="Hoax" else 0.6,"method":"keyword"})
    return results

def get_topics(texts):
    try:
        cleaned=batch_clean(texts); cleaned=[t for t in cleaned if len(t)>3]
        if len(cleaned)<5: return [["data kurang"]]
        vec=CountVectorizer(min_df=2,stop_words=list(STOPWORDS)); X=vec.fit_transform(cleaned)
        if X.shape[1]==0: return [["kosong"]]
        n=min(3,X.shape[1]); lda=LatentDirichletAllocation(n_components=n,random_state=42); lda.fit(X)
        words=vec.get_feature_names_out()
        return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_]
    except Exception as e: print(f"topic error: {e}"); return [["error"]]

def generate_insight(data):
    s=[d["sentiment"] for d in data]
    return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')} Uncertain:{s.count('Uncertain')}"

def cluster_opinions(texts):
    try:
        if len(texts)<6: return []
        cleaned=batch_clean(texts)
        X=TfidfVectorizer(max_features=300,stop_words=list(STOPWORDS)).fit_transform(cleaned)
        n=min(3,len(texts)); k=KMeans(n_clusters=n,n_init=10,random_state=42).fit(X)
        clusters={}
        for i,lbl in enumerate(k.labels_): clusters.setdefault(int(lbl),[]).append(texts[i])
        return [{"cluster":l,"samples":s[:3]} for l,s in clusters.items()]
    except Exception as e: print(f"cluster error: {e}"); return []

def build_network(texts):
    edges={}
    for t in texts:
        words=[w for w in set(clean_text_deep(t).split()) if len(w)>3 and w not in STOPWORDS][:6]
        for a,b in combinations(words,2):
            key=tuple(sorted([a,b])); edges[key]=edges.get(key,0)+1
    return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]

def detect_bot_network(texts):
    try:
        if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
        X=TfidfVectorizer(max_features=300).fit_transform(texts); sim=cosine_similarity(X)
        G=nx.Graph()
        for i in range(len(texts)): G.add_node(i,text=texts[i])
        for i in range(len(texts)):
            for j in range(i+1,len(texts)):
                if sim[i][j]>0.75: G.add_edge(i,j)
        central=nx.degree_centrality(G)
        bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
        return {"nodes":[{"id":i} for i in G.nodes()],"edges":[{"source":u,"target":v} for u,v in G.edges()],"bots":bots[:10]}
    except Exception as e: print(f"bot error: {e}"); return {"nodes":[],"edges":[],"bots":[]}

def run_gnn_safe(nodes, edges, texts):
    if not nodes or not edges or len(nodes)<3:
        return [{"node":n["id"],"score":0.0} for n in nodes]
    try:
        import torch
        from torch_geometric.data import Data
        from torch_geometric.nn import GCNConv
        node_texts=[texts[n["id"]] if n["id"]<len(texts) else "" for n in nodes]
        vec=TfidfVectorizer(max_features=32,min_df=1)
        try: X=vec.fit_transform(node_texts).toarray()
        except: X=np.eye(len(nodes),32)
        x=torch.tensor(X,dtype=torch.float)
        edge_list=[[e["source"],e["target"]] for e in edges if e["source"]<len(nodes) and e["target"]<len(nodes)]
        if not edge_list: return [{"node":n["id"],"score":0.0} for n in nodes]
        edge_index=torch.tensor(edge_list,dtype=torch.long).t().contiguous()
        class GCN(torch.nn.Module):
            def __init__(self,in_ch):
                super().__init__(); self.conv1=GCNConv(in_ch,16); self.conv2=GCNConv(16,4)
            def forward(self,x,ei): return self.conv2(torch.relu(self.conv1(x,ei)),ei)
        torch.manual_seed(42); model=GCN(x.shape[1]); model.eval()
        with torch.no_grad(): out=model(x,edge_index)
        scores=torch.norm(out,dim=1).numpy()
        if scores.max()>scores.min(): scores=(scores-scores.min())/(scores.max()-scores.min())
        else: scores=np.zeros(len(scores))
        return [{"node":nodes[i]["id"],"score":round(float(scores[i]),3)} for i in range(len(nodes))]
    except Exception as e: print(f"GNN error: {e}"); return [{"node":n["id"],"score":0.0} for n in nodes]

# ── ROUTES ──
@app.route("/")
def home(): return render_template("index.html")

@app.route("/result")
def result(): return render_template("result.html")

@app.route("/analyze", methods=["POST"])
def analyze():
    try:
        body=request.json or {}
        keyword=body.get("keyword","").strip(); source=body.get("source","all")
        conf_th=float(body.get("conf_threshold",CONF_THRESHOLD))
        if not keyword: return jsonify({"error":"keyword kosong","data":[]}),400
        raw=collect_data(keyword,source)
        texts=[t for _,t in raw][:100]; sources=[s for s,_ in raw][:100]
        scored=predict_with_score(texts)
        scored_filtered=apply_confidence_filter(scored,threshold=conf_th)
        result_data=[{"text":t,"sentiment":s["sentiment"],"confidence":s["confidence"],"is_certain":s["is_certain"],"source":src,"scraped_at":datetime.now().strftime("%Y-%m-%d %H:%M")} for t,s,src in zip(texts,scored_filtered,sources)]
        conf_stats_data=confidence_stats(result_data)
        cross_data=cross_platform_analysis(result_data)
        generate_comparative_chart(cross_data)
        generate_wordcloud(texts); generate_heatmap(result_data); generate_timeline(result_data)
        top_words=get_top_words(texts); topics=get_topics(texts); insight=generate_insight(result_data)
        clusters=cluster_opinions(texts); trend=predict_trend(result_data); hoax=detect_hoax(texts)
        network=build_network(texts); bot_network=detect_bot_network(texts)
        gnn=run_gnn_safe(bot_network["nodes"],bot_network["edges"],texts)
        bot_bert=detect_bot_bert(texts); fake_news=detect_fake_news(texts)
        export=build_export_data(result_data,keyword,source,conf_stats_data,cross_data,trend)
        save_export_csv(export)
        return jsonify({"data":result_data,"top_words":top_words,"topics":topics,"insight":insight,"clusters":clusters,"hoax":hoax,"network":network,"bot_network":bot_network,"trend":trend,"bot_bert":bot_bert,"fake_news":fake_news,"gnn":gnn,"conf_stats":conf_stats_data,"cross_platform":cross_data,"export_summary":export["summary"],"absa":absa_result,"ner":ner_result,"stance":stance_result,"emotions":emotion_result,"keywords":keywords_result,"summaries":summaries})
    except Exception as e:
        print(f"ERROR /analyze: {e}"); return jsonify({"error":str(e),"data":[]}),500

@app.route("/download")
def download():
    path="static/result.csv"
    if not os.path.exists(path): return jsonify({"error":"Belum ada hasil"}),404
    return send_file(path,as_attachment=True)

@app.route("/download/summary")
def download_summary():
    path="static/summary.csv"
    if not os.path.exists(path): return jsonify({"error":"Belum ada summary"}),404
    return send_file(path,as_attachment=True)

@app.route("/static/<path:filename>")
def static_files(filename): return send_file(f"static/{filename}")

if __name__=="__main__":
    app.run(host="0.0.0.0",port=7860,debug=False)