Spaces:
Sleeping
Sleeping
File size: 26,280 Bytes
ababcd4 adcfce4 ababcd4 91d8afe acfb9de f59bfc4 5cb0ade 06f79f7 ababcd4 7051fa3 06f79f7 ababcd4 409899f 5cb0ade 409899f 5cb0ade 06f79f7 5cb0ade 06f79f7 5cb0ade f59bfc4 5cb0ade 7051fa3 5cb0ade 7051fa3 5cb0ade dd8b15b 5cb0ade f59bfc4 91d8afe f59bfc4 06f79f7 f59bfc4 06f79f7 f59bfc4 06f79f7 f59bfc4 06f79f7 f59bfc4 06f79f7 f59bfc4 06f79f7 f59bfc4 06f79f7 a3e4fd3 f59bfc4 a3e4fd3 ababcd4 a3e4fd3 f59bfc4 7051fa3 f59bfc4 a3e4fd3 f59bfc4 45e75e6 f59bfc4 7051fa3 f59bfc4 7051fa3 f59bfc4 ababcd4 f59bfc4 acfb9de 5cb0ade a3e4fd3 f59bfc4 06f79f7 f59bfc4 a3e4fd3 7051fa3 f59bfc4 7051fa3 5cb0ade f59bfc4 ababcd4 5cb0ade f59bfc4 5cb0ade f59bfc4 ababcd4 5cb0ade 409899f f59bfc4 5cb0ade f59bfc4 5cb0ade f59bfc4 91d8afe 7051fa3 f59bfc4 7051fa3 5cb0ade ababcd4 a3e4fd3 f59bfc4 dd8b15b a3e4fd3 f59bfc4 a3e4fd3 5cb0ade acfb9de f59bfc4 7051fa3 f59bfc4 7051fa3 f59bfc4 a9b1efa f59bfc4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 | from flask import Flask, render_template, request, jsonify, send_file
try:
from services.aggregator import collect_data
except Exception as e:
print(f"β FATAL: aggregator gagal load: {e}")
def collect_data(kw, src="all"): return [("unknown", "aggregator error")]
try:
from services.sentiment import predict_with_score
except Exception as e:
print(f"β οΈ sentiment gagal load: {e} β rule-based fallback")
def predict_with_score(texts):
def _rb(t):
pos = sum(1 for k in ['bagus','baik','senang','suka','mantap','oke','good','great'] if k in t.lower())
neg = sum(1 for k in ['buruk','jelek','benci','kecewa','gagal','bad','worst'] if k in t.lower())
label = 'Positive' if pos > neg else 'Negative' if neg > pos else 'Neutral'
return {'label': label, 'score': 0.5}
return [_rb(t) for t in texts]
from collections import Counter
import pandas as pd
import os, re
import numpy as np
from datetime import datetime
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import networkx as nx
from itertools import combinations
from wordcloud import WordCloud
# Deep preprocessing
try:
from services.preprocessing_id import clean_text_deep, batch_clean, STOPWORDS
DEEP_PREP = True
print("β
Deep preprocessing loaded")
except ImportError:
DEEP_PREP = False
STOPWORDS = {'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada','pada','juga','tidak','bisa','sudah','the','is','in','of','a','an','and','it'}
def clean_text_deep(t):
t = t.lower()
t = re.sub(r'http\S+', '', t)
t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)
return re.sub(r'\s+', ' ', t).strip()
def batch_clean(texts): return [clean_text_deep(t) for t in texts]
try:
from services.bot_bert import detect_bot_bert
except Exception:
def detect_bot_bert(x): return []
try:
from services.fake_news import detect_fake_news
except Exception:
def detect_fake_news(x): return []
# ββ New NLP Services ββ
try:
from services.absa import analyze_absa
except Exception as e:
print(f"β οΈ ABSA not available: {e}")
def analyze_absa(x): return {'top_aspects':[],'aggregate':{},'aspect_sentiment_map':{}}
try:
from services.ner import analyze_ner
except Exception as e:
print(f"β οΈ NER not available: {e}")
def analyze_ner(x): return {'top_entities':[],'entities_by_type':{}}
try:
from services.advanced_nlp import (
analyze_stance, analyze_emotions,
extract_keywords, summarize_by_platform
)
except Exception as e:
print(f"β οΈ Advanced NLP not available: {e}")
def analyze_stance(x, t=None): return {'counts':{},'dominant':'Neutral','favor_pct':0,'against_pct':0,'neutral_pct':0}
def analyze_emotions(x): return {'distribution':{},'dominant':'neutral','emotional_pct':0}
def extract_keywords(x, n=20): return []
def summarize_by_platform(x): return {}
app = Flask(__name__)
CONF_THRESHOLD = 0.60
# ββ HOAX CLASSIFIER ββ
_HX = ["berita ini bohong dan tidak benar","ini propaganda yang menyesatkan","jangan percaya hoax yang beredar","informasi palsu disebarkan untuk memfitnah","disinformasi sengaja dibuat untuk menipu","berita palsu sangat meresahkan warga","menyebarkan kebohongan dan fitnah","manipulasi politik yang berbahaya","provokasi untuk memecah belah bangsa","ujaran kebencian dan fitnah","waspada berita bohong sengaja disebarkan","hoaks sudah dibantah pihak berwenang","informasi menyesatkan tidak ada bukti","narasi sesat untuk mengadu domba","berita manipulatif perlu diklarifikasi","produk ini sangat bagus dan berkualitas","saya sangat senang dengan pelayanannya","hasil kerja tim ini luar biasa","kebijakan ini berdampak positif masyarakat","acara kemarin berjalan lancar dan meriah","terima kasih atas bantuan yang diberikan","pemerintah berupaya meningkatkan kesejahteraan","inovasi terbaru sangat membantu kehidupan","prestasi luar biasa yang membanggakan","kondisi ekonomi mulai membaik dari data","program ini memberikan manfaat nyata","kolaborasi baik menghasilkan output optimal","penelitian ini memberikan temuan menarik","masyarakat antusias menyambut kebijakan baru","kualitas pendidikan terus meningkat"]
_HY = [1]*15 + [0]*15
_hoax_clf = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), max_features=500, sublinear_tf=True)), ('clf', LogisticRegression(C=1.0, max_iter=200, random_state=42, class_weight='balanced'))])
try:
_hoax_clf.fit(_HX, _HY)
print("β
Hoax classifier ready")
except Exception as e:
print(f"β οΈ Hoax error: {e}")
_hoax_clf = None
# ββ CONFIDENCE FILTER (Priority 2) ββ
def apply_confidence_filter(scored, threshold=CONF_THRESHOLD):
result = []
for item in scored:
conf = item.get('score', 0)
label = item.get('label', 'Neutral')
result.append({**item, 'sentiment': label if conf >= threshold else 'Uncertain', 'confidence': round(conf, 4), 'is_certain': conf >= threshold})
return result
def confidence_stats(result_data):
by_class = {'Positive':[], 'Negative':[], 'Neutral':[], 'Uncertain':[]}
for r in result_data:
s = r.get('sentiment','Neutral')
c = r.get('confidence',0)
if s in by_class: by_class[s].append(c)
else: by_class['Uncertain'].append(c)
stats = {}
for cls, vals in by_class.items():
if vals:
stats[cls] = {'count':len(vals),'mean':round(float(np.mean(vals)),3),'std':round(float(np.std(vals)),3),'min':round(float(np.min(vals)),3),'max':round(float(np.max(vals)),3)}
else:
stats[cls] = {'count':0,'mean':0,'std':0,'min':0,'max':0}
all_conf = [r.get('confidence',0) for r in result_data]
return {'by_class':stats,'buckets':{'high (β₯0.8)':sum(1 for c in all_conf if c>=0.8),'med (0.6-0.8)':sum(1 for c in all_conf if 0.6<=c<0.8),'low (<0.6)':sum(1 for c in all_conf if c<0.6)},'uncertain_count':sum(1 for r in result_data if not r.get('is_certain',True)),'avg_confidence':round(float(np.mean(all_conf)),3) if all_conf else 0}
# ββ CROSS-PLATFORM ANALYSIS (Priority 3) ββ
def cross_platform_analysis(result_data):
platforms = {}
for r in result_data:
src = r.get('source','unknown'); sent = r.get('sentiment','Neutral'); conf = r.get('confidence',0)
if src not in platforms:
platforms[src] = {'Positive':0,'Negative':0,'Neutral':0,'Uncertain':0,'total':0,'conf_sum':0}
if sent in platforms[src]: platforms[src][sent] += 1
else: platforms[src]['Uncertain'] += 1
platforms[src]['total'] += 1
platforms[src]['conf_sum'] += conf
platform_stats = {}
for src, c in platforms.items():
t = c['total'] or 1
pos_r = c['Positive']/t; neg_r = c['Negative']/t; neu_r = c['Neutral']/t
platform_stats[src] = {'total':t,'pos_count':c['Positive'],'neg_count':c['Negative'],'neu_count':c['Neutral'],'unc_count':c['Uncertain'],'pos_pct':round(pos_r*100,1),'neg_pct':round(neg_r*100,1),'neu_pct':round(neu_r*100,1),'unc_pct':round(c['Uncertain']/t*100,1),'polarity':round(abs(pos_r-neg_r),3),'avg_conf':round(c['conf_sum']/t,3),'dominant':max(['Positive','Negative','Neutral','Uncertain'],key=lambda s:c[s])}
if not platform_stats:
return {'platforms':{},'pairwise':[],'insights':[],'most_positive':None,'most_negative':None,'most_polarized':None}
srcs = list(platform_stats.keys())
most_positive = max(srcs, key=lambda s: platform_stats[s]['pos_pct'])
most_negative = max(srcs, key=lambda s: platform_stats[s]['neg_pct'])
most_polarized = max(srcs, key=lambda s: platform_stats[s]['polarity'])
pairwise = []
for i in range(len(srcs)):
for j in range(i+1, len(srcs)):
a, b = srcs[i], srcs[j]
diff = round(abs(platform_stats[a]['pos_pct']-platform_stats[b]['pos_pct']),1)
pairwise.append({'platform_a':a,'platform_b':b,'pos_diff':diff,'description':f"{a} vs {b}: selisih sentimen positif {diff}%"})
insights = []
if len(srcs) > 1:
insights.append(f"{most_positive.capitalize()} memiliki sentimen positif tertinggi ({platform_stats[most_positive]['pos_pct']}%).")
insights.append(f"{most_negative.capitalize()} memiliki sentimen negatif tertinggi ({platform_stats[most_negative]['neg_pct']}%).")
insights.append(f"{most_polarized.capitalize()} paling terpolarisasi (indeks {platform_stats[most_polarized]['polarity']}).")
return {'platforms':platform_stats,'pairwise':pairwise,'insights':insights,'most_positive':most_positive,'most_negative':most_negative,'most_polarized':most_polarized}
def generate_comparative_chart(cross_data):
try:
platforms = cross_data.get('platforms',{})
if len(platforms) < 2: return
os.makedirs("static", exist_ok=True)
srcs = list(platforms.keys())
pos = [platforms[s]['pos_pct'] for s in srcs]
neg = [platforms[s]['neg_pct'] for s in srcs]
neu = [platforms[s]['neu_pct'] for s in srcs]
pol = [platforms[s]['polarity']*100 for s in srcs]
cnf = [platforms[s]['avg_conf']*100 for s in srcs]
x = np.arange(len(srcs)); w = 0.26
fig, axes = plt.subplots(1, 2, figsize=(13,4))
fig.patch.set_facecolor('#0e1117')
for ax in axes: ax.set_facecolor('#141820')
axes[0].bar(x-w, pos, w, label='Positif', color='#22c55e', alpha=0.85)
axes[0].bar(x, neg, w, label='Negatif', color='#ef4444', alpha=0.85)
axes[0].bar(x+w, neu, w, label='Netral', color='#94a3b8', alpha=0.85)
axes[0].set_xticks(x); axes[0].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
axes[0].set_title('Distribusi Sentimen per Platform', color='#e8eaf0', fontsize=10)
axes[0].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
axes[0].set_ylim(0,105); axes[0].tick_params(colors='#5a6070')
axes[1].bar(x-0.2, pol, 0.38, label='Polarisasi Γ100', color='#f59e0b', alpha=0.8)
axes[1].bar(x+0.2, cnf, 0.38, label='Avg Confidence %', color='#4f9cf9', alpha=0.8)
axes[1].set_xticks(x); axes[1].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
axes[1].set_title('Polarisasi & Confidence per Platform', color='#e8eaf0', fontsize=10)
axes[1].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
axes[1].set_ylim(0,110); axes[1].tick_params(colors='#5a6070')
for ax in axes:
for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
plt.tight_layout(pad=1.5)
plt.savefig("static/comparative.png", dpi=110, facecolor=fig.get_facecolor())
plt.close(fig)
except Exception as e:
print(f"comparative chart error: {e}")
# ββ RICH EXPORT (Priority 4) ββ
def build_export_data(result_data, keyword, source, conf_stats, cross_data, trend):
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
main_rows = [{'index':i+1,'text':r.get('text',''),'text_length':len(r.get('text','').split()),'sentiment':r.get('sentiment',''),'confidence':r.get('confidence',0),'is_certain':r.get('is_certain',True),'source':r.get('source',''),'scraped_at':r.get('scraped_at',ts),'keyword':keyword} for i,r in enumerate(result_data)]
total = len(result_data) or 1
pos = sum(1 for r in result_data if r.get('sentiment')=='Positive')
neg = sum(1 for r in result_data if r.get('sentiment')=='Negative')
neu = sum(1 for r in result_data if r.get('sentiment')=='Neutral')
unc = sum(1 for r in result_data if r.get('sentiment')=='Uncertain')
summary = {'keyword':keyword,'source':source,'analyzed_at':ts,'total_samples':total,'positive_count':pos,'negative_count':neg,'neutral_count':neu,'uncertain_count':unc,'positive_pct':round(pos/total*100,1),'negative_pct':round(neg/total*100,1),'neutral_pct':round(neu/total*100,1),'uncertain_pct':round(unc/total*100,1),'avg_confidence':conf_stats.get('avg_confidence',0),'trend_label':trend.get('label',''),'polarity_index':trend.get('polarity',0),'most_positive_platform':cross_data.get('most_positive','')}
return {'main':main_rows,'summary':summary}
def save_export_csv(export):
os.makedirs("static", exist_ok=True)
pd.DataFrame(export['main']).to_csv("static/result.csv", index=False)
pd.DataFrame([export['summary']]).to_csv("static/summary.csv", index=False)
# ββ CORE FUNCTIONS ββ
def get_top_words(texts):
words = []
for t in texts:
for w in clean_text_deep(t).split():
if len(w) > 2 and w not in STOPWORDS: words.append(w)
return [{"word":w,"count":c} for w,c in Counter(words).most_common(15)]
def generate_wordcloud(texts):
try:
os.makedirs("static", exist_ok=True)
combined = " ".join(batch_clean(texts))
if not combined.strip(): return
WordCloud(width=900,height=380,background_color='#0e1117',color_func=lambda *a,**k:'#4f9cf9',max_words=80,stopwords=STOPWORDS).generate(combined).to_file("static/wordcloud.png")
except Exception as e: print(f"wordcloud error: {e}")
def generate_heatmap(data):
try:
if not data: return
labels = ["Positive","Neutral","Negative","Uncertain"]
sources = sorted(set(d["source"] for d in data))
matrix = np.zeros((len(sources),len(labels)))
for d in data:
i = sources.index(d["source"]); s = d["sentiment"]
j = labels.index(s) if s in labels else 3
matrix[i][j] += 1
if matrix.sum()==0: return
fig, ax = plt.subplots(figsize=(7,max(2,len(sources))))
fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
im = ax.imshow(matrix, cmap='Blues', aspect='auto')
ax.set_xticks(range(len(labels))); ax.set_xticklabels(labels, color='#8892a4', fontsize=9)
ax.set_yticks(range(len(sources))); ax.set_yticklabels(sources, color='#8892a4', fontsize=9)
ax.tick_params(colors='#5a6070'); plt.colorbar(im, ax=ax); plt.tight_layout()
os.makedirs("static", exist_ok=True)
plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor()); plt.close(fig)
except Exception as e: print(f"heatmap error: {e}")
def generate_timeline(data):
try:
if not data or len(data)<3: return
os.makedirs("static", exist_ok=True)
window = max(5,len(data)//10)
def roll(arr,w): return [sum(arr[max(0,i-w+1):i+1])/len(arr[max(0,i-w+1):i+1]) for i in range(len(arr))]
pos_r=[1 if d["sentiment"]=="Positive" else 0 for d in data]
neg_r=[1 if d["sentiment"]=="Negative" else 0 for d in data]
neu_r=[1 if d["sentiment"]=="Neutral" else 0 for d in data]
unc_r=[1 if d["sentiment"]=="Uncertain" else 0 for d in data]
x=list(range(1,len(data)+1))
fig,ax=plt.subplots(figsize=(11,3.5)); fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
ax.fill_between(x,roll(pos_r,window),alpha=0.12,color='#22c55e')
ax.fill_between(x,roll(neg_r,window),alpha=0.12,color='#ef4444')
ax.plot(x,roll(pos_r,window),color='#22c55e',lw=1.8,label='Positif')
ax.plot(x,roll(neg_r,window),color='#ef4444',lw=1.8,label='Negatif')
ax.plot(x,roll(neu_r,window),color='#94a3b8',lw=1.2,ls='--',label='Netral')
ax.plot(x,roll(unc_r,window),color='#f59e0b',lw=1.0,ls=':',label='Uncertain')
ax.axhline(np.mean(pos_r),color='#22c55e',lw=0.6,ls=':',alpha=0.5)
ax.axhline(np.mean(neg_r),color='#ef4444',lw=0.6,ls=':',alpha=0.5)
ax.set_xlabel(f'Urutan komentar (rolling mean, window={window})',color='#5a6070',fontsize=8)
ax.set_ylabel('Proporsi',color='#5a6070',fontsize=8); ax.tick_params(colors='#5a6070',labelsize=7)
for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
ax.legend(fontsize=8,facecolor='#141820',edgecolor='#1a2030',labelcolor='#8892a4')
ax.set_ylim(0,1.05); ax.set_xlim(1,len(data)); plt.tight_layout(pad=1.0)
plt.savefig("static/timeline.png",dpi=110,facecolor=fig.get_facecolor()); plt.close(fig)
except Exception as e: print(f"timeline error: {e}")
def predict_trend(data):
if not data: return {"label":"Kurang Data","dominant":"Neutral","polarity":0.0,"confidence":0.0,"by_source":{},"summary":"Tidak ada data."}
sentiments=[d["sentiment"] for d in data]; total=len(sentiments)
pos=sentiments.count("Positive"); neg=sentiments.count("Negative"); neu=sentiments.count("Neutral")
pos_r,neg_r,neu_r=pos/total,neg/total,neu/total; polarity=round(abs(pos_r-neg_r),3)
by_source={}
for d in data:
src=d.get("source","unknown")
if src not in by_source: by_source[src]={"Positive":0,"Negative":0,"Neutral":0,"Uncertain":0,"total":0}
s=d["sentiment"]
if s in by_source[src]: by_source[src][s]+=1
else: by_source[src]["Uncertain"]+=1
by_source[src]["total"]+=1
for src in by_source:
t=by_source[src]["total"]
by_source[src]["pos_pct"]=round(by_source[src]["Positive"]/t*100,1)
by_source[src]["neg_pct"]=round(by_source[src]["Negative"]/t*100,1)
by_source[src]["neu_pct"]=round(by_source[src]["Neutral"] /t*100,1)
if pos_r>neg_r and pos_r>neu_r: label,dominant,conf="Dominan Positif","Positive",round(pos_r,3)
elif neg_r>pos_r and neg_r>neu_r: label,dominant,conf="Dominan Negatif","Negative",round(neg_r,3)
elif neu_r>=0.5: label,dominant,conf="Mayoritas Netral","Neutral",round(neu_r,3)
else: label,dominant,conf="Terpolarisasi","Mixed",round(polarity,3)
dom_src=max(by_source,key=lambda s:by_source[s]["total"]) if by_source else "-"
return {"label":label,"dominant":dominant,"polarity":polarity,"confidence":conf,"by_source":by_source,"pos_pct":round(pos_r*100,1),"neg_pct":round(neg_r*100,1),"neu_pct":round(neu_r*100,1),"summary":f"{label} ({round(pos_r*100,1)}% positif, {round(neg_r*100,1)}% negatif, {round(neu_r*100,1)}% netral). Indeks polarisasi: {polarity:.2f}. Sumber terbanyak: {dom_src}."}
def detect_hoax(texts):
results=[]; sample=texts[:20]
if _hoax_clf is not None:
try:
preds=_hoax_clf.predict(sample); probas=_hoax_clf.predict_proba(sample)
for t,p,pr in zip(sample,preds,probas):
results.append({"text":t,"label":"Hoax" if p==1 else "Normal","confidence":round(float(max(pr)),3),"method":"ml"})
return results
except: pass
KW=["hoax","bohong","fitnah","propaganda","palsu","disinformasi","menyesatkan","kebohongan","manipulasi","adu domba","provokasi","berita palsu","ujaran kebencian","tidak benar","narasi sesat"]
for t in sample:
sc=sum(1 for k in KW if k in t.lower()); lbl="Hoax" if sc>=1 else "Normal"
results.append({"text":t,"label":lbl,"confidence":min(0.5+sc*0.1,0.95) if lbl=="Hoax" else 0.6,"method":"keyword"})
return results
def get_topics(texts):
try:
cleaned=batch_clean(texts); cleaned=[t for t in cleaned if len(t)>3]
if len(cleaned)<5: return [["data kurang"]]
vec=CountVectorizer(min_df=2,stop_words=list(STOPWORDS)); X=vec.fit_transform(cleaned)
if X.shape[1]==0: return [["kosong"]]
n=min(3,X.shape[1]); lda=LatentDirichletAllocation(n_components=n,random_state=42); lda.fit(X)
words=vec.get_feature_names_out()
return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_]
except Exception as e: print(f"topic error: {e}"); return [["error"]]
def generate_insight(data):
s=[d["sentiment"] for d in data]
return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')} Uncertain:{s.count('Uncertain')}"
def cluster_opinions(texts):
try:
if len(texts)<6: return []
cleaned=batch_clean(texts)
X=TfidfVectorizer(max_features=300,stop_words=list(STOPWORDS)).fit_transform(cleaned)
n=min(3,len(texts)); k=KMeans(n_clusters=n,n_init=10,random_state=42).fit(X)
clusters={}
for i,lbl in enumerate(k.labels_): clusters.setdefault(int(lbl),[]).append(texts[i])
return [{"cluster":l,"samples":s[:3]} for l,s in clusters.items()]
except Exception as e: print(f"cluster error: {e}"); return []
def build_network(texts):
edges={}
for t in texts:
words=[w for w in set(clean_text_deep(t).split()) if len(w)>3 and w not in STOPWORDS][:6]
for a,b in combinations(words,2):
key=tuple(sorted([a,b])); edges[key]=edges.get(key,0)+1
return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]
def detect_bot_network(texts):
try:
if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
X=TfidfVectorizer(max_features=300).fit_transform(texts); sim=cosine_similarity(X)
G=nx.Graph()
for i in range(len(texts)): G.add_node(i,text=texts[i])
for i in range(len(texts)):
for j in range(i+1,len(texts)):
if sim[i][j]>0.75: G.add_edge(i,j)
central=nx.degree_centrality(G)
bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
return {"nodes":[{"id":i} for i in G.nodes()],"edges":[{"source":u,"target":v} for u,v in G.edges()],"bots":bots[:10]}
except Exception as e: print(f"bot error: {e}"); return {"nodes":[],"edges":[],"bots":[]}
def run_gnn_safe(nodes, edges, texts):
if not nodes or not edges or len(nodes)<3:
return [{"node":n["id"],"score":0.0} for n in nodes]
try:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
node_texts=[texts[n["id"]] if n["id"]<len(texts) else "" for n in nodes]
vec=TfidfVectorizer(max_features=32,min_df=1)
try: X=vec.fit_transform(node_texts).toarray()
except: X=np.eye(len(nodes),32)
x=torch.tensor(X,dtype=torch.float)
edge_list=[[e["source"],e["target"]] for e in edges if e["source"]<len(nodes) and e["target"]<len(nodes)]
if not edge_list: return [{"node":n["id"],"score":0.0} for n in nodes]
edge_index=torch.tensor(edge_list,dtype=torch.long).t().contiguous()
class GCN(torch.nn.Module):
def __init__(self,in_ch):
super().__init__(); self.conv1=GCNConv(in_ch,16); self.conv2=GCNConv(16,4)
def forward(self,x,ei): return self.conv2(torch.relu(self.conv1(x,ei)),ei)
torch.manual_seed(42); model=GCN(x.shape[1]); model.eval()
with torch.no_grad(): out=model(x,edge_index)
scores=torch.norm(out,dim=1).numpy()
if scores.max()>scores.min(): scores=(scores-scores.min())/(scores.max()-scores.min())
else: scores=np.zeros(len(scores))
return [{"node":nodes[i]["id"],"score":round(float(scores[i]),3)} for i in range(len(nodes))]
except Exception as e: print(f"GNN error: {e}"); return [{"node":n["id"],"score":0.0} for n in nodes]
# ββ ROUTES ββ
@app.route("/")
def home(): return render_template("index.html")
@app.route("/result")
def result(): return render_template("result.html")
@app.route("/analyze", methods=["POST"])
def analyze():
try:
body=request.json or {}
keyword=body.get("keyword","").strip(); source=body.get("source","all")
conf_th=float(body.get("conf_threshold",CONF_THRESHOLD))
if not keyword: return jsonify({"error":"keyword kosong","data":[]}),400
raw=collect_data(keyword,source)
texts=[t for _,t in raw][:100]; sources=[s for s,_ in raw][:100]
scored=predict_with_score(texts)
scored_filtered=apply_confidence_filter(scored,threshold=conf_th)
result_data=[{"text":t,"sentiment":s["sentiment"],"confidence":s["confidence"],"is_certain":s["is_certain"],"source":src,"scraped_at":datetime.now().strftime("%Y-%m-%d %H:%M")} for t,s,src in zip(texts,scored_filtered,sources)]
conf_stats_data=confidence_stats(result_data)
cross_data=cross_platform_analysis(result_data)
generate_comparative_chart(cross_data)
generate_wordcloud(texts); generate_heatmap(result_data); generate_timeline(result_data)
top_words=get_top_words(texts); topics=get_topics(texts); insight=generate_insight(result_data)
clusters=cluster_opinions(texts); trend=predict_trend(result_data); hoax=detect_hoax(texts)
network=build_network(texts); bot_network=detect_bot_network(texts)
gnn=run_gnn_safe(bot_network["nodes"],bot_network["edges"],texts)
bot_bert=detect_bot_bert(texts); fake_news=detect_fake_news(texts)
export=build_export_data(result_data,keyword,source,conf_stats_data,cross_data,trend)
save_export_csv(export)
return jsonify({"data":result_data,"top_words":top_words,"topics":topics,"insight":insight,"clusters":clusters,"hoax":hoax,"network":network,"bot_network":bot_network,"trend":trend,"bot_bert":bot_bert,"fake_news":fake_news,"gnn":gnn,"conf_stats":conf_stats_data,"cross_platform":cross_data,"export_summary":export["summary"],"absa":absa_result,"ner":ner_result,"stance":stance_result,"emotions":emotion_result,"keywords":keywords_result,"summaries":summaries})
except Exception as e:
print(f"ERROR /analyze: {e}"); return jsonify({"error":str(e),"data":[]}),500
@app.route("/download")
def download():
path="static/result.csv"
if not os.path.exists(path): return jsonify({"error":"Belum ada hasil"}),404
return send_file(path,as_attachment=True)
@app.route("/download/summary")
def download_summary():
path="static/summary.csv"
if not os.path.exists(path): return jsonify({"error":"Belum ada summary"}),404
return send_file(path,as_attachment=True)
@app.route("/static/<path:filename>")
def static_files(filename): return send_file(f"static/{filename}")
if __name__=="__main__":
app.run(host="0.0.0.0",port=7860,debug=False) |