noranisa's picture
Update main.py
adcfce4 verified
from flask import Flask, render_template, request, jsonify, send_file
try:
from services.aggregator import collect_data
except Exception as e:
print(f"❌ FATAL: aggregator gagal load: {e}")
def collect_data(kw, src="all"): return [("unknown", "aggregator error")]
try:
from services.sentiment import predict_with_score
except Exception as e:
print(f"⚠️ sentiment gagal load: {e} — rule-based fallback")
def predict_with_score(texts):
def _rb(t):
pos = sum(1 for k in ['bagus','baik','senang','suka','mantap','oke','good','great'] if k in t.lower())
neg = sum(1 for k in ['buruk','jelek','benci','kecewa','gagal','bad','worst'] if k in t.lower())
label = 'Positive' if pos > neg else 'Negative' if neg > pos else 'Neutral'
return {'label': label, 'score': 0.5}
return [_rb(t) for t in texts]
from collections import Counter
import pandas as pd
import os, re
import numpy as np
from datetime import datetime
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import networkx as nx
from itertools import combinations
from wordcloud import WordCloud
# Deep preprocessing
try:
from services.preprocessing_id import clean_text_deep, batch_clean, STOPWORDS
DEEP_PREP = True
print("✅ Deep preprocessing loaded")
except ImportError:
DEEP_PREP = False
STOPWORDS = {'yang','dan','di','ke','dari','ini','itu','dengan','untuk','adalah','ada','pada','juga','tidak','bisa','sudah','the','is','in','of','a','an','and','it'}
def clean_text_deep(t):
t = t.lower()
t = re.sub(r'http\S+', '', t)
t = re.sub(r'[^a-zA-Z0-9\s]', ' ', t)
return re.sub(r'\s+', ' ', t).strip()
def batch_clean(texts): return [clean_text_deep(t) for t in texts]
try:
from services.bot_bert import detect_bot_bert
except Exception:
def detect_bot_bert(x): return []
try:
from services.fake_news import detect_fake_news
except Exception:
def detect_fake_news(x): return []
# ── New NLP Services ──
try:
from services.absa import analyze_absa
except Exception as e:
print(f"⚠️ ABSA not available: {e}")
def analyze_absa(x): return {'top_aspects':[],'aggregate':{},'aspect_sentiment_map':{}}
try:
from services.ner import analyze_ner
except Exception as e:
print(f"⚠️ NER not available: {e}")
def analyze_ner(x): return {'top_entities':[],'entities_by_type':{}}
try:
from services.advanced_nlp import (
analyze_stance, analyze_emotions,
extract_keywords, summarize_by_platform
)
except Exception as e:
print(f"⚠️ Advanced NLP not available: {e}")
def analyze_stance(x, t=None): return {'counts':{},'dominant':'Neutral','favor_pct':0,'against_pct':0,'neutral_pct':0}
def analyze_emotions(x): return {'distribution':{},'dominant':'neutral','emotional_pct':0}
def extract_keywords(x, n=20): return []
def summarize_by_platform(x): return {}
app = Flask(__name__)
CONF_THRESHOLD = 0.60
# ── HOAX CLASSIFIER ──
_HX = ["berita ini bohong dan tidak benar","ini propaganda yang menyesatkan","jangan percaya hoax yang beredar","informasi palsu disebarkan untuk memfitnah","disinformasi sengaja dibuat untuk menipu","berita palsu sangat meresahkan warga","menyebarkan kebohongan dan fitnah","manipulasi politik yang berbahaya","provokasi untuk memecah belah bangsa","ujaran kebencian dan fitnah","waspada berita bohong sengaja disebarkan","hoaks sudah dibantah pihak berwenang","informasi menyesatkan tidak ada bukti","narasi sesat untuk mengadu domba","berita manipulatif perlu diklarifikasi","produk ini sangat bagus dan berkualitas","saya sangat senang dengan pelayanannya","hasil kerja tim ini luar biasa","kebijakan ini berdampak positif masyarakat","acara kemarin berjalan lancar dan meriah","terima kasih atas bantuan yang diberikan","pemerintah berupaya meningkatkan kesejahteraan","inovasi terbaru sangat membantu kehidupan","prestasi luar biasa yang membanggakan","kondisi ekonomi mulai membaik dari data","program ini memberikan manfaat nyata","kolaborasi baik menghasilkan output optimal","penelitian ini memberikan temuan menarik","masyarakat antusias menyambut kebijakan baru","kualitas pendidikan terus meningkat"]
_HY = [1]*15 + [0]*15
_hoax_clf = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), max_features=500, sublinear_tf=True)), ('clf', LogisticRegression(C=1.0, max_iter=200, random_state=42, class_weight='balanced'))])
try:
_hoax_clf.fit(_HX, _HY)
print("✅ Hoax classifier ready")
except Exception as e:
print(f"⚠️ Hoax error: {e}")
_hoax_clf = None
# ── CONFIDENCE FILTER (Priority 2) ──
def apply_confidence_filter(scored, threshold=CONF_THRESHOLD):
result = []
for item in scored:
conf = item.get('score', 0)
label = item.get('label', 'Neutral')
result.append({**item, 'sentiment': label if conf >= threshold else 'Uncertain', 'confidence': round(conf, 4), 'is_certain': conf >= threshold})
return result
def confidence_stats(result_data):
by_class = {'Positive':[], 'Negative':[], 'Neutral':[], 'Uncertain':[]}
for r in result_data:
s = r.get('sentiment','Neutral')
c = r.get('confidence',0)
if s in by_class: by_class[s].append(c)
else: by_class['Uncertain'].append(c)
stats = {}
for cls, vals in by_class.items():
if vals:
stats[cls] = {'count':len(vals),'mean':round(float(np.mean(vals)),3),'std':round(float(np.std(vals)),3),'min':round(float(np.min(vals)),3),'max':round(float(np.max(vals)),3)}
else:
stats[cls] = {'count':0,'mean':0,'std':0,'min':0,'max':0}
all_conf = [r.get('confidence',0) for r in result_data]
return {'by_class':stats,'buckets':{'high (≥0.8)':sum(1 for c in all_conf if c>=0.8),'med (0.6-0.8)':sum(1 for c in all_conf if 0.6<=c<0.8),'low (<0.6)':sum(1 for c in all_conf if c<0.6)},'uncertain_count':sum(1 for r in result_data if not r.get('is_certain',True)),'avg_confidence':round(float(np.mean(all_conf)),3) if all_conf else 0}
# ── CROSS-PLATFORM ANALYSIS (Priority 3) ──
def cross_platform_analysis(result_data):
platforms = {}
for r in result_data:
src = r.get('source','unknown'); sent = r.get('sentiment','Neutral'); conf = r.get('confidence',0)
if src not in platforms:
platforms[src] = {'Positive':0,'Negative':0,'Neutral':0,'Uncertain':0,'total':0,'conf_sum':0}
if sent in platforms[src]: platforms[src][sent] += 1
else: platforms[src]['Uncertain'] += 1
platforms[src]['total'] += 1
platforms[src]['conf_sum'] += conf
platform_stats = {}
for src, c in platforms.items():
t = c['total'] or 1
pos_r = c['Positive']/t; neg_r = c['Negative']/t; neu_r = c['Neutral']/t
platform_stats[src] = {'total':t,'pos_count':c['Positive'],'neg_count':c['Negative'],'neu_count':c['Neutral'],'unc_count':c['Uncertain'],'pos_pct':round(pos_r*100,1),'neg_pct':round(neg_r*100,1),'neu_pct':round(neu_r*100,1),'unc_pct':round(c['Uncertain']/t*100,1),'polarity':round(abs(pos_r-neg_r),3),'avg_conf':round(c['conf_sum']/t,3),'dominant':max(['Positive','Negative','Neutral','Uncertain'],key=lambda s:c[s])}
if not platform_stats:
return {'platforms':{},'pairwise':[],'insights':[],'most_positive':None,'most_negative':None,'most_polarized':None}
srcs = list(platform_stats.keys())
most_positive = max(srcs, key=lambda s: platform_stats[s]['pos_pct'])
most_negative = max(srcs, key=lambda s: platform_stats[s]['neg_pct'])
most_polarized = max(srcs, key=lambda s: platform_stats[s]['polarity'])
pairwise = []
for i in range(len(srcs)):
for j in range(i+1, len(srcs)):
a, b = srcs[i], srcs[j]
diff = round(abs(platform_stats[a]['pos_pct']-platform_stats[b]['pos_pct']),1)
pairwise.append({'platform_a':a,'platform_b':b,'pos_diff':diff,'description':f"{a} vs {b}: selisih sentimen positif {diff}%"})
insights = []
if len(srcs) > 1:
insights.append(f"{most_positive.capitalize()} memiliki sentimen positif tertinggi ({platform_stats[most_positive]['pos_pct']}%).")
insights.append(f"{most_negative.capitalize()} memiliki sentimen negatif tertinggi ({platform_stats[most_negative]['neg_pct']}%).")
insights.append(f"{most_polarized.capitalize()} paling terpolarisasi (indeks {platform_stats[most_polarized]['polarity']}).")
return {'platforms':platform_stats,'pairwise':pairwise,'insights':insights,'most_positive':most_positive,'most_negative':most_negative,'most_polarized':most_polarized}
def generate_comparative_chart(cross_data):
try:
platforms = cross_data.get('platforms',{})
if len(platforms) < 2: return
os.makedirs("static", exist_ok=True)
srcs = list(platforms.keys())
pos = [platforms[s]['pos_pct'] for s in srcs]
neg = [platforms[s]['neg_pct'] for s in srcs]
neu = [platforms[s]['neu_pct'] for s in srcs]
pol = [platforms[s]['polarity']*100 for s in srcs]
cnf = [platforms[s]['avg_conf']*100 for s in srcs]
x = np.arange(len(srcs)); w = 0.26
fig, axes = plt.subplots(1, 2, figsize=(13,4))
fig.patch.set_facecolor('#0e1117')
for ax in axes: ax.set_facecolor('#141820')
axes[0].bar(x-w, pos, w, label='Positif', color='#22c55e', alpha=0.85)
axes[0].bar(x, neg, w, label='Negatif', color='#ef4444', alpha=0.85)
axes[0].bar(x+w, neu, w, label='Netral', color='#94a3b8', alpha=0.85)
axes[0].set_xticks(x); axes[0].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
axes[0].set_title('Distribusi Sentimen per Platform', color='#e8eaf0', fontsize=10)
axes[0].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
axes[0].set_ylim(0,105); axes[0].tick_params(colors='#5a6070')
axes[1].bar(x-0.2, pol, 0.38, label='Polarisasi ×100', color='#f59e0b', alpha=0.8)
axes[1].bar(x+0.2, cnf, 0.38, label='Avg Confidence %', color='#4f9cf9', alpha=0.8)
axes[1].set_xticks(x); axes[1].set_xticklabels([s.capitalize() for s in srcs], color='#8892a4', fontsize=9)
axes[1].set_title('Polarisasi & Confidence per Platform', color='#e8eaf0', fontsize=10)
axes[1].legend(fontsize=8, facecolor='#141820', edgecolor='#1a2030', labelcolor='#8892a4')
axes[1].set_ylim(0,110); axes[1].tick_params(colors='#5a6070')
for ax in axes:
for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
plt.tight_layout(pad=1.5)
plt.savefig("static/comparative.png", dpi=110, facecolor=fig.get_facecolor())
plt.close(fig)
except Exception as e:
print(f"comparative chart error: {e}")
# ── RICH EXPORT (Priority 4) ──
def build_export_data(result_data, keyword, source, conf_stats, cross_data, trend):
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
main_rows = [{'index':i+1,'text':r.get('text',''),'text_length':len(r.get('text','').split()),'sentiment':r.get('sentiment',''),'confidence':r.get('confidence',0),'is_certain':r.get('is_certain',True),'source':r.get('source',''),'scraped_at':r.get('scraped_at',ts),'keyword':keyword} for i,r in enumerate(result_data)]
total = len(result_data) or 1
pos = sum(1 for r in result_data if r.get('sentiment')=='Positive')
neg = sum(1 for r in result_data if r.get('sentiment')=='Negative')
neu = sum(1 for r in result_data if r.get('sentiment')=='Neutral')
unc = sum(1 for r in result_data if r.get('sentiment')=='Uncertain')
summary = {'keyword':keyword,'source':source,'analyzed_at':ts,'total_samples':total,'positive_count':pos,'negative_count':neg,'neutral_count':neu,'uncertain_count':unc,'positive_pct':round(pos/total*100,1),'negative_pct':round(neg/total*100,1),'neutral_pct':round(neu/total*100,1),'uncertain_pct':round(unc/total*100,1),'avg_confidence':conf_stats.get('avg_confidence',0),'trend_label':trend.get('label',''),'polarity_index':trend.get('polarity',0),'most_positive_platform':cross_data.get('most_positive','')}
return {'main':main_rows,'summary':summary}
def save_export_csv(export):
os.makedirs("static", exist_ok=True)
pd.DataFrame(export['main']).to_csv("static/result.csv", index=False)
pd.DataFrame([export['summary']]).to_csv("static/summary.csv", index=False)
# ── CORE FUNCTIONS ──
def get_top_words(texts):
words = []
for t in texts:
for w in clean_text_deep(t).split():
if len(w) > 2 and w not in STOPWORDS: words.append(w)
return [{"word":w,"count":c} for w,c in Counter(words).most_common(15)]
def generate_wordcloud(texts):
try:
os.makedirs("static", exist_ok=True)
combined = " ".join(batch_clean(texts))
if not combined.strip(): return
WordCloud(width=900,height=380,background_color='#0e1117',color_func=lambda *a,**k:'#4f9cf9',max_words=80,stopwords=STOPWORDS).generate(combined).to_file("static/wordcloud.png")
except Exception as e: print(f"wordcloud error: {e}")
def generate_heatmap(data):
try:
if not data: return
labels = ["Positive","Neutral","Negative","Uncertain"]
sources = sorted(set(d["source"] for d in data))
matrix = np.zeros((len(sources),len(labels)))
for d in data:
i = sources.index(d["source"]); s = d["sentiment"]
j = labels.index(s) if s in labels else 3
matrix[i][j] += 1
if matrix.sum()==0: return
fig, ax = plt.subplots(figsize=(7,max(2,len(sources))))
fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
im = ax.imshow(matrix, cmap='Blues', aspect='auto')
ax.set_xticks(range(len(labels))); ax.set_xticklabels(labels, color='#8892a4', fontsize=9)
ax.set_yticks(range(len(sources))); ax.set_yticklabels(sources, color='#8892a4', fontsize=9)
ax.tick_params(colors='#5a6070'); plt.colorbar(im, ax=ax); plt.tight_layout()
os.makedirs("static", exist_ok=True)
plt.savefig("static/heatmap.png", dpi=100, facecolor=fig.get_facecolor()); plt.close(fig)
except Exception as e: print(f"heatmap error: {e}")
def generate_timeline(data):
try:
if not data or len(data)<3: return
os.makedirs("static", exist_ok=True)
window = max(5,len(data)//10)
def roll(arr,w): return [sum(arr[max(0,i-w+1):i+1])/len(arr[max(0,i-w+1):i+1]) for i in range(len(arr))]
pos_r=[1 if d["sentiment"]=="Positive" else 0 for d in data]
neg_r=[1 if d["sentiment"]=="Negative" else 0 for d in data]
neu_r=[1 if d["sentiment"]=="Neutral" else 0 for d in data]
unc_r=[1 if d["sentiment"]=="Uncertain" else 0 for d in data]
x=list(range(1,len(data)+1))
fig,ax=plt.subplots(figsize=(11,3.5)); fig.patch.set_facecolor('#0e1117'); ax.set_facecolor('#141820')
ax.fill_between(x,roll(pos_r,window),alpha=0.12,color='#22c55e')
ax.fill_between(x,roll(neg_r,window),alpha=0.12,color='#ef4444')
ax.plot(x,roll(pos_r,window),color='#22c55e',lw=1.8,label='Positif')
ax.plot(x,roll(neg_r,window),color='#ef4444',lw=1.8,label='Negatif')
ax.plot(x,roll(neu_r,window),color='#94a3b8',lw=1.2,ls='--',label='Netral')
ax.plot(x,roll(unc_r,window),color='#f59e0b',lw=1.0,ls=':',label='Uncertain')
ax.axhline(np.mean(pos_r),color='#22c55e',lw=0.6,ls=':',alpha=0.5)
ax.axhline(np.mean(neg_r),color='#ef4444',lw=0.6,ls=':',alpha=0.5)
ax.set_xlabel(f'Urutan komentar (rolling mean, window={window})',color='#5a6070',fontsize=8)
ax.set_ylabel('Proporsi',color='#5a6070',fontsize=8); ax.tick_params(colors='#5a6070',labelsize=7)
for sp in ax.spines.values(): sp.set_edgecolor('#1a2030')
ax.legend(fontsize=8,facecolor='#141820',edgecolor='#1a2030',labelcolor='#8892a4')
ax.set_ylim(0,1.05); ax.set_xlim(1,len(data)); plt.tight_layout(pad=1.0)
plt.savefig("static/timeline.png",dpi=110,facecolor=fig.get_facecolor()); plt.close(fig)
except Exception as e: print(f"timeline error: {e}")
def predict_trend(data):
if not data: return {"label":"Kurang Data","dominant":"Neutral","polarity":0.0,"confidence":0.0,"by_source":{},"summary":"Tidak ada data."}
sentiments=[d["sentiment"] for d in data]; total=len(sentiments)
pos=sentiments.count("Positive"); neg=sentiments.count("Negative"); neu=sentiments.count("Neutral")
pos_r,neg_r,neu_r=pos/total,neg/total,neu/total; polarity=round(abs(pos_r-neg_r),3)
by_source={}
for d in data:
src=d.get("source","unknown")
if src not in by_source: by_source[src]={"Positive":0,"Negative":0,"Neutral":0,"Uncertain":0,"total":0}
s=d["sentiment"]
if s in by_source[src]: by_source[src][s]+=1
else: by_source[src]["Uncertain"]+=1
by_source[src]["total"]+=1
for src in by_source:
t=by_source[src]["total"]
by_source[src]["pos_pct"]=round(by_source[src]["Positive"]/t*100,1)
by_source[src]["neg_pct"]=round(by_source[src]["Negative"]/t*100,1)
by_source[src]["neu_pct"]=round(by_source[src]["Neutral"] /t*100,1)
if pos_r>neg_r and pos_r>neu_r: label,dominant,conf="Dominan Positif","Positive",round(pos_r,3)
elif neg_r>pos_r and neg_r>neu_r: label,dominant,conf="Dominan Negatif","Negative",round(neg_r,3)
elif neu_r>=0.5: label,dominant,conf="Mayoritas Netral","Neutral",round(neu_r,3)
else: label,dominant,conf="Terpolarisasi","Mixed",round(polarity,3)
dom_src=max(by_source,key=lambda s:by_source[s]["total"]) if by_source else "-"
return {"label":label,"dominant":dominant,"polarity":polarity,"confidence":conf,"by_source":by_source,"pos_pct":round(pos_r*100,1),"neg_pct":round(neg_r*100,1),"neu_pct":round(neu_r*100,1),"summary":f"{label} ({round(pos_r*100,1)}% positif, {round(neg_r*100,1)}% negatif, {round(neu_r*100,1)}% netral). Indeks polarisasi: {polarity:.2f}. Sumber terbanyak: {dom_src}."}
def detect_hoax(texts):
results=[]; sample=texts[:20]
if _hoax_clf is not None:
try:
preds=_hoax_clf.predict(sample); probas=_hoax_clf.predict_proba(sample)
for t,p,pr in zip(sample,preds,probas):
results.append({"text":t,"label":"Hoax" if p==1 else "Normal","confidence":round(float(max(pr)),3),"method":"ml"})
return results
except: pass
KW=["hoax","bohong","fitnah","propaganda","palsu","disinformasi","menyesatkan","kebohongan","manipulasi","adu domba","provokasi","berita palsu","ujaran kebencian","tidak benar","narasi sesat"]
for t in sample:
sc=sum(1 for k in KW if k in t.lower()); lbl="Hoax" if sc>=1 else "Normal"
results.append({"text":t,"label":lbl,"confidence":min(0.5+sc*0.1,0.95) if lbl=="Hoax" else 0.6,"method":"keyword"})
return results
def get_topics(texts):
try:
cleaned=batch_clean(texts); cleaned=[t for t in cleaned if len(t)>3]
if len(cleaned)<5: return [["data kurang"]]
vec=CountVectorizer(min_df=2,stop_words=list(STOPWORDS)); X=vec.fit_transform(cleaned)
if X.shape[1]==0: return [["kosong"]]
n=min(3,X.shape[1]); lda=LatentDirichletAllocation(n_components=n,random_state=42); lda.fit(X)
words=vec.get_feature_names_out()
return [[words[i] for i in t.argsort()[-5:]] for t in lda.components_]
except Exception as e: print(f"topic error: {e}"); return [["error"]]
def generate_insight(data):
s=[d["sentiment"] for d in data]
return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')} Uncertain:{s.count('Uncertain')}"
def cluster_opinions(texts):
try:
if len(texts)<6: return []
cleaned=batch_clean(texts)
X=TfidfVectorizer(max_features=300,stop_words=list(STOPWORDS)).fit_transform(cleaned)
n=min(3,len(texts)); k=KMeans(n_clusters=n,n_init=10,random_state=42).fit(X)
clusters={}
for i,lbl in enumerate(k.labels_): clusters.setdefault(int(lbl),[]).append(texts[i])
return [{"cluster":l,"samples":s[:3]} for l,s in clusters.items()]
except Exception as e: print(f"cluster error: {e}"); return []
def build_network(texts):
edges={}
for t in texts:
words=[w for w in set(clean_text_deep(t).split()) if len(w)>3 and w not in STOPWORDS][:6]
for a,b in combinations(words,2):
key=tuple(sorted([a,b])); edges[key]=edges.get(key,0)+1
return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]
def detect_bot_network(texts):
try:
if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
X=TfidfVectorizer(max_features=300).fit_transform(texts); sim=cosine_similarity(X)
G=nx.Graph()
for i in range(len(texts)): G.add_node(i,text=texts[i])
for i in range(len(texts)):
for j in range(i+1,len(texts)):
if sim[i][j]>0.75: G.add_edge(i,j)
central=nx.degree_centrality(G)
bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
return {"nodes":[{"id":i} for i in G.nodes()],"edges":[{"source":u,"target":v} for u,v in G.edges()],"bots":bots[:10]}
except Exception as e: print(f"bot error: {e}"); return {"nodes":[],"edges":[],"bots":[]}
def run_gnn_safe(nodes, edges, texts):
if not nodes or not edges or len(nodes)<3:
return [{"node":n["id"],"score":0.0} for n in nodes]
try:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
node_texts=[texts[n["id"]] if n["id"]<len(texts) else "" for n in nodes]
vec=TfidfVectorizer(max_features=32,min_df=1)
try: X=vec.fit_transform(node_texts).toarray()
except: X=np.eye(len(nodes),32)
x=torch.tensor(X,dtype=torch.float)
edge_list=[[e["source"],e["target"]] for e in edges if e["source"]<len(nodes) and e["target"]<len(nodes)]
if not edge_list: return [{"node":n["id"],"score":0.0} for n in nodes]
edge_index=torch.tensor(edge_list,dtype=torch.long).t().contiguous()
class GCN(torch.nn.Module):
def __init__(self,in_ch):
super().__init__(); self.conv1=GCNConv(in_ch,16); self.conv2=GCNConv(16,4)
def forward(self,x,ei): return self.conv2(torch.relu(self.conv1(x,ei)),ei)
torch.manual_seed(42); model=GCN(x.shape[1]); model.eval()
with torch.no_grad(): out=model(x,edge_index)
scores=torch.norm(out,dim=1).numpy()
if scores.max()>scores.min(): scores=(scores-scores.min())/(scores.max()-scores.min())
else: scores=np.zeros(len(scores))
return [{"node":nodes[i]["id"],"score":round(float(scores[i]),3)} for i in range(len(nodes))]
except Exception as e: print(f"GNN error: {e}"); return [{"node":n["id"],"score":0.0} for n in nodes]
# ── ROUTES ──
@app.route("/")
def home(): return render_template("index.html")
@app.route("/result")
def result(): return render_template("result.html")
@app.route("/analyze", methods=["POST"])
def analyze():
try:
body=request.json or {}
keyword=body.get("keyword","").strip(); source=body.get("source","all")
conf_th=float(body.get("conf_threshold",CONF_THRESHOLD))
if not keyword: return jsonify({"error":"keyword kosong","data":[]}),400
raw=collect_data(keyword,source)
texts=[t for _,t in raw][:100]; sources=[s for s,_ in raw][:100]
scored=predict_with_score(texts)
scored_filtered=apply_confidence_filter(scored,threshold=conf_th)
result_data=[{"text":t,"sentiment":s["sentiment"],"confidence":s["confidence"],"is_certain":s["is_certain"],"source":src,"scraped_at":datetime.now().strftime("%Y-%m-%d %H:%M")} for t,s,src in zip(texts,scored_filtered,sources)]
conf_stats_data=confidence_stats(result_data)
cross_data=cross_platform_analysis(result_data)
generate_comparative_chart(cross_data)
generate_wordcloud(texts); generate_heatmap(result_data); generate_timeline(result_data)
top_words=get_top_words(texts); topics=get_topics(texts); insight=generate_insight(result_data)
clusters=cluster_opinions(texts); trend=predict_trend(result_data); hoax=detect_hoax(texts)
network=build_network(texts); bot_network=detect_bot_network(texts)
gnn=run_gnn_safe(bot_network["nodes"],bot_network["edges"],texts)
bot_bert=detect_bot_bert(texts); fake_news=detect_fake_news(texts)
export=build_export_data(result_data,keyword,source,conf_stats_data,cross_data,trend)
save_export_csv(export)
return jsonify({"data":result_data,"top_words":top_words,"topics":topics,"insight":insight,"clusters":clusters,"hoax":hoax,"network":network,"bot_network":bot_network,"trend":trend,"bot_bert":bot_bert,"fake_news":fake_news,"gnn":gnn,"conf_stats":conf_stats_data,"cross_platform":cross_data,"export_summary":export["summary"],"absa":absa_result,"ner":ner_result,"stance":stance_result,"emotions":emotion_result,"keywords":keywords_result,"summaries":summaries})
except Exception as e:
print(f"ERROR /analyze: {e}"); return jsonify({"error":str(e),"data":[]}),500
@app.route("/download")
def download():
path="static/result.csv"
if not os.path.exists(path): return jsonify({"error":"Belum ada hasil"}),404
return send_file(path,as_attachment=True)
@app.route("/download/summary")
def download_summary():
path="static/summary.csv"
if not os.path.exists(path): return jsonify({"error":"Belum ada summary"}),404
return send_file(path,as_attachment=True)
@app.route("/static/<path:filename>")
def static_files(filename): return send_file(f"static/{filename}")
if __name__=="__main__":
app.run(host="0.0.0.0",port=7860,debug=False)