noranisa commited on
Commit
5cb0ade
·
verified ·
1 Parent(s): 5f1fb1e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +187 -182
main.py CHANGED
@@ -2,96 +2,107 @@ from flask import Flask, render_template, request, jsonify, send_file
2
  from services.aggregator import collect_data
3
  from services.sentiment import predict
4
 
 
 
 
5
  from collections import Counter
6
  import pandas as pd
7
  import os
8
  import re
 
9
 
10
  # VISUAL
11
  from wordcloud import WordCloud
12
  import matplotlib.pyplot as plt
13
- import numpy as np
14
 
15
  # ML
16
- from sklearn.decomposition import LatentDirichletAllocation
17
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 
18
  from sklearn.cluster import KMeans
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # =========================
21
- # INIT
22
  # =========================
23
- app = Flask(__name__)
 
24
 
25
 
26
  # =========================
27
  # 🔥 TOP WORDS
28
  # =========================
29
- def get_top_words(texts, top_n=10):
30
  words = []
31
  for t in texts:
32
- t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
33
- words.extend(t.split())
34
- return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
35
 
36
 
37
  # =========================
38
- # 🔥 WORDCLOUD (FIX)
39
  # =========================
40
  def generate_wordcloud(texts):
41
  try:
42
  os.makedirs("static", exist_ok=True)
43
- texts = [t for t in texts if len(t.strip()) > 3]
44
-
45
- if len(texts) == 0:
46
- return
47
-
48
- wc = WordCloud(width=800, height=400).generate(" ".join(texts))
49
  wc.to_file("static/wordcloud.png")
50
-
51
  except Exception as e:
52
- print(" Wordcloud error:", e)
53
 
54
 
55
  # =========================
56
- # 🔥 HEATMAP (FIX)
57
  # =========================
58
  def generate_heatmap(data):
59
  try:
60
- if len(data) == 0:
61
- return
 
62
 
63
- labels_sent = ["Positive", "Neutral", "Negative"]
64
- labels_src = list(set([d["source"] for d in data]))
65
-
66
- matrix = np.zeros((len(labels_src), len(labels_sent)))
67
 
68
  for d in data:
69
- i = labels_src.index(d["source"])
70
- j = labels_sent.index(d["sentiment"])
71
- matrix[i][j] += 1
72
 
73
- if matrix.sum() == 0:
74
- return
75
 
76
  plt.figure()
77
  plt.imshow(matrix)
78
-
79
- plt.xticks(range(len(labels_sent)), labels_sent)
80
- plt.yticks(range(len(labels_src)), labels_src)
81
-
82
- for i in range(len(labels_src)):
83
- for j in range(len(labels_sent)):
84
- plt.text(j, i, int(matrix[i][j]), ha='center')
85
-
86
- plt.title("Heatmap Sentimen")
87
  plt.colorbar()
88
-
89
- os.makedirs("static", exist_ok=True)
90
  plt.savefig("static/heatmap.png")
91
  plt.close()
92
-
93
  except Exception as e:
94
- print(" Heatmap error:", e)
95
 
96
 
97
  # =========================
@@ -99,221 +110,215 @@ def generate_heatmap(data):
99
  # =========================
100
  def generate_timeline(data):
101
  try:
102
- if len(data) == 0:
103
- return
104
-
105
  os.makedirs("static", exist_ok=True)
106
 
107
- timestamps = list(range(len(data)))
108
-
109
- pos, neg, neu = [], [], []
110
-
111
  for d in data:
112
- pos.append(1 if d["sentiment"] == "Positive" else 0)
113
- neg.append(1 if d["sentiment"] == "Negative" else 0)
114
- neu.append(1 if d["sentiment"] == "Neutral" else 0)
115
 
116
  plt.figure()
117
- plt.plot(timestamps, pos, label="Positive")
118
- plt.plot(timestamps, neg, label="Negative")
119
- plt.plot(timestamps, neu, label="Neutral")
120
-
121
  plt.legend()
122
- plt.title("Sentiment Timeline")
123
-
124
  plt.savefig("static/timeline.png")
125
  plt.close()
126
-
127
  except Exception as e:
128
- print(" Timeline error:", e)
129
 
130
 
131
  # =========================
132
- # 🔥 TOPIC MODELING (SAFE)
133
  # =========================
134
- def get_topics(texts, n_topics=3):
135
  try:
136
- texts = [t for t in texts if len(t.strip()) > 3]
137
-
138
- if len(texts) < 5:
139
- return [["data kurang"]]
140
 
141
- vectorizer = CountVectorizer(min_df=2)
142
- X = vectorizer.fit_transform(texts)
143
 
144
- if X.shape[1] == 0:
145
- return [["tidak ada kata"]]
146
 
147
- lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
148
  lda.fit(X)
149
 
150
- words = vectorizer.get_feature_names_out()
151
-
152
- topics = []
153
- for topic in lda.components_:
154
- topics.append([words[i] for i in topic.argsort()[-5:]])
155
-
156
  return topics
157
-
158
- except Exception as e:
159
- print("❌ LDA error:", e)
160
- return [["topic gagal"]]
161
 
162
 
163
  # =========================
164
- # 🤖 AI INSIGHT (RULE SAFE)
165
  # =========================
166
- def generate_insight(data, topics):
167
- sentiments = [d["sentiment"] for d in data]
168
-
169
- pos = sentiments.count("Positive")
170
- neg = sentiments.count("Negative")
171
- neu = sentiments.count("Neutral")
172
-
173
- total = len(sentiments)
174
 
175
- if total == 0:
176
- return "Tidak ada data"
177
 
178
- insight = f"""
179
- Total data: {total}
180
- Positive: {pos}
181
- Negative: {neg}
182
- Neutral: {neu}
 
 
 
 
 
 
 
 
 
183
 
184
- Mayoritas opini: {"Positif" if pos > neg else "Negatif"}
185
 
186
- Topik utama:
187
- """
 
 
 
 
188
 
189
- for i, t in enumerate(topics):
190
- insight += f"\nTopik {i+1}: {', '.join(t)}"
191
 
192
- return insight
 
 
 
 
 
 
 
 
 
 
193
 
194
 
195
  # =========================
196
- # 🔥 CLUSTERING
197
  # =========================
198
- def cluster_opinions(texts):
199
  try:
200
- texts = [t for t in texts if len(t.strip()) > 5]
201
 
202
- if len(texts) < 5:
203
- return []
204
 
205
- vectorizer = TfidfVectorizer(max_features=500)
206
- X = vectorizer.fit_transform(texts)
 
207
 
208
- model = KMeans(n_clusters=3, random_state=42, n_init=10)
209
- labels = model.fit_predict(X)
 
 
210
 
211
- clusters = {}
212
- for i, label in enumerate(labels):
213
- clusters.setdefault(label, []).append(texts[i])
214
 
215
- result = []
216
- for k, v in clusters.items():
217
- result.append({"cluster": int(k), "samples": v[:3]})
218
 
219
- return result
 
220
 
221
- except Exception as e:
222
- print("❌ clustering error:", e)
223
- return []
224
 
225
 
226
  # =========================
227
- # 🚨 HOAX DETECTION
228
  # =========================
229
- def detect_hoax(texts):
230
- keywords = ["hoax","bohong","fitnah","manipulasi","propaganda","tipu"]
231
-
232
- result = []
233
- for t in texts:
234
- score = sum(1 for k in keywords if k in t.lower())
235
- result.append({
236
- "text": t,
237
- "score": score,
238
- "label": "Hoax" if score >= 2 else "Normal"
239
- })
240
-
241
- return result
242
 
243
 
244
  # =========================
245
- # 🌐 HOME
246
  # =========================
247
- @app.route('/')
248
  def home():
249
  return render_template("index.html")
250
 
251
 
252
- # =========================
253
- # 🚀 ANALYZE
254
- # =========================
255
- @app.route('/analyze', methods=['POST'])
256
  def analyze():
257
  try:
258
- keyword = request.json.get('keyword')
259
- source = request.json.get('source', 'all')
260
 
261
- data_raw = collect_data(keyword, source)
262
 
263
- texts = [t for s, t in data_raw][:100]
264
- sources = [s for s, t in data_raw][:100]
265
 
266
- sentiments = predict(texts)
267
 
268
- result = []
269
- for t, s, src in zip(texts, sentiments, sources):
270
- result.append({
271
- "text": t,
272
- "sentiment": s,
273
- "source": src
274
- })
275
 
276
  # VISUAL
277
  generate_wordcloud(texts)
278
  generate_heatmap(result)
279
  generate_timeline(result)
280
 
281
- # ANALYTICS
282
- top_words = get_top_words(texts)
283
- topics = get_topics(texts)
284
- insight = generate_insight(result, topics)
285
-
286
- clusters = cluster_opinions(texts)
287
- hoax = detect_hoax(texts)
288
-
289
- # CSV
290
- os.makedirs("static", exist_ok=True)
291
- pd.DataFrame(result).to_csv("static/result.csv", index=False)
 
 
 
 
 
 
 
292
 
293
  return jsonify({
294
- "data": result,
295
- "top_words": top_words,
296
- "topics": topics,
297
- "insight": insight,
298
- "clusters": clusters,
299
- "hoax": hoax
 
 
 
 
 
 
300
  })
301
 
302
  except Exception as e:
303
- print("ERROR:", e)
304
- return jsonify({"data": []})
305
 
306
 
307
- # =========================
308
- # 📥 DOWNLOAD
309
- # =========================
310
- @app.route('/download')
311
  def download():
312
- return send_file("static/result.csv", as_attachment=True)
313
 
314
 
315
  # =========================
316
- # ▶️ RUN
317
  # =========================
318
- if __name__ == "__main__":
319
- app.run(host="0.0.0.0", port=7860)
 
2
  from services.aggregator import collect_data
3
  from services.sentiment import predict
4
 
5
+ # =========================
6
+ # IMPORT TAMBAHAN
7
+ # =========================
8
  from collections import Counter
9
  import pandas as pd
10
  import os
11
  import re
12
+ import numpy as np
13
 
14
  # VISUAL
15
  from wordcloud import WordCloud
16
  import matplotlib.pyplot as plt
 
17
 
18
  # ML
 
19
  from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
20
+ from sklearn.decomposition import LatentDirichletAllocation
21
  from sklearn.cluster import KMeans
22
+ from sklearn.metrics.pairwise import cosine_similarity
23
+ from sklearn.linear_model import LinearRegression
24
+
25
+ # GRAPH
26
+ import networkx as nx
27
+ from itertools import combinations
28
+
29
+ # OPTIONAL ADVANCED
30
+ try:
31
+ from services.bot_bert import detect_bot_bert
32
+ except:
33
+ def detect_bot_bert(x): return []
34
+
35
+ try:
36
+ from services.fake_news import detect_fake_news
37
+ except:
38
+ def detect_fake_news(x): return []
39
+
40
+ try:
41
+ from services.gnn import run_gnn
42
+ except:
43
+ def run_gnn(n,e): return []
44
+
45
+ app = Flask(__name__)
46
 
47
  # =========================
48
+ # 🔥 UTIL
49
  # =========================
50
+ def clean_text(t):
51
+ return re.sub(r'[^a-zA-Z\s]', '', t.lower())
52
 
53
 
54
  # =========================
55
  # 🔥 TOP WORDS
56
  # =========================
57
+ def get_top_words(texts):
58
  words = []
59
  for t in texts:
60
+ words.extend(clean_text(t).split())
61
+ return [{"word":w,"count":c} for w,c in Counter(words).most_common(10)]
 
62
 
63
 
64
  # =========================
65
+ # 🔥 WORDCLOUD
66
  # =========================
67
  def generate_wordcloud(texts):
68
  try:
69
  os.makedirs("static", exist_ok=True)
70
+ texts = [t for t in texts if len(t.strip())>3]
71
+ if not texts: return
72
+ wc = WordCloud(width=800,height=400).generate(" ".join(texts))
 
 
 
73
  wc.to_file("static/wordcloud.png")
 
74
  except Exception as e:
75
+ print("wordcloud error:",e)
76
 
77
 
78
  # =========================
79
+ # 🔥 HEATMAP
80
  # =========================
81
  def generate_heatmap(data):
82
  try:
83
+ if not data: return
84
+ labels = ["Positive","Neutral","Negative"]
85
+ sources = list(set([d["source"] for d in data]))
86
 
87
+ matrix = np.zeros((len(sources), len(labels)))
 
 
 
88
 
89
  for d in data:
90
+ i = sources.index(d["source"])
91
+ j = labels.index(d["sentiment"])
92
+ matrix[i][j]+=1
93
 
94
+ if matrix.sum()==0: return
 
95
 
96
  plt.figure()
97
  plt.imshow(matrix)
98
+ plt.xticks(range(len(labels)),labels)
99
+ plt.yticks(range(len(sources)),sources)
 
 
 
 
 
 
 
100
  plt.colorbar()
101
+ os.makedirs("static",exist_ok=True)
 
102
  plt.savefig("static/heatmap.png")
103
  plt.close()
 
104
  except Exception as e:
105
+ print("heatmap error:",e)
106
 
107
 
108
  # =========================
 
110
  # =========================
111
  def generate_timeline(data):
112
  try:
113
+ if not data: return
 
 
114
  os.makedirs("static", exist_ok=True)
115
 
116
+ pos,neg,neu=[],[],[]
 
 
 
117
  for d in data:
118
+ pos.append(1 if d["sentiment"]=="Positive" else 0)
119
+ neg.append(1 if d["sentiment"]=="Negative" else 0)
120
+ neu.append(1 if d["sentiment"]=="Neutral" else 0)
121
 
122
  plt.figure()
123
+ plt.plot(pos,label="Positive")
124
+ plt.plot(neg,label="Negative")
125
+ plt.plot(neu,label="Neutral")
 
126
  plt.legend()
 
 
127
  plt.savefig("static/timeline.png")
128
  plt.close()
 
129
  except Exception as e:
130
+ print("timeline error:",e)
131
 
132
 
133
  # =========================
134
+ # 🔥 TOPIC MODELING
135
  # =========================
136
+ def get_topics(texts):
137
  try:
138
+ texts = [t for t in texts if len(t)>3]
139
+ if len(texts)<5: return [["data kurang"]]
 
 
140
 
141
+ vec = CountVectorizer(min_df=2)
142
+ X = vec.fit_transform(texts)
143
 
144
+ if X.shape[1]==0: return [["kosong"]]
 
145
 
146
+ lda = LatentDirichletAllocation(n_components=3)
147
  lda.fit(X)
148
 
149
+ words = vec.get_feature_names_out()
150
+ topics=[]
151
+ for t in lda.components_:
152
+ topics.append([words[i] for i in t.argsort()[-5:]])
 
 
153
  return topics
154
+ except:
155
+ return [["error"]]
 
 
156
 
157
 
158
  # =========================
159
+ # 🔥 INSIGHT
160
  # =========================
161
+ def generate_insight(data):
162
+ s=[d["sentiment"] for d in data]
163
+ return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')}"
 
 
 
 
 
164
 
 
 
165
 
166
+ # =========================
167
+ # 🔥 CLUSTER
168
+ # =========================
169
+ def cluster_opinions(texts):
170
+ try:
171
+ if len(texts)<5: return []
172
+ X=TfidfVectorizer(max_features=300).fit_transform(texts)
173
+ k=KMeans(n_clusters=3,n_init=10).fit(X)
174
+ clusters={}
175
+ for i,l in enumerate(k.labels_):
176
+ clusters.setdefault(l,[]).append(texts[i])
177
+ return [{"cluster":k,"samples":v[:3]} for k,v in clusters.items()]
178
+ except:
179
+ return []
180
 
 
181
 
182
+ # =========================
183
+ # 🔥 HOAX
184
+ # =========================
185
+ def detect_hoax(texts):
186
+ kw=["hoax","bohong","fitnah","propaganda"]
187
+ return [{"text":t,"label":"Hoax" if any(k in t.lower() for k in kw) else "Normal"} for t in texts[:10]]
188
 
 
 
189
 
190
+ # =========================
191
+ # 🔥 NETWORK
192
+ # =========================
193
+ def build_network(texts):
194
+ edges={}
195
+ for t in texts:
196
+ w=list(set(t.split()))[:5]
197
+ for a,b in combinations(w,2):
198
+ key=tuple(sorted([a,b]))
199
+ edges[key]=edges.get(key,0)+1
200
+ return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]
201
 
202
 
203
  # =========================
204
+ # 🔥 BOT NETWORK
205
  # =========================
206
+ def detect_bot_network(texts):
207
  try:
208
+ if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
209
 
210
+ X=TfidfVectorizer(max_features=300).fit_transform(texts)
211
+ sim=cosine_similarity(X)
212
 
213
+ G=nx.Graph()
214
+ for i in range(len(texts)):
215
+ G.add_node(i,text=texts[i])
216
 
217
+ for i in range(len(texts)):
218
+ for j in range(i+1,len(texts)):
219
+ if sim[i][j]>0.75:
220
+ G.add_edge(i,j)
221
 
222
+ central=nx.degree_centrality(G)
 
 
223
 
224
+ bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
 
 
225
 
226
+ nodes=[{"id":i} for i in G.nodes()]
227
+ edges=[{"source":u,"target":v} for u,v in G.edges()]
228
 
229
+ return {"nodes":nodes,"edges":edges,"bots":bots[:10]}
230
+ except:
231
+ return {"nodes":[],"edges":[],"bots":[]}
232
 
233
 
234
  # =========================
235
+ # 🔥 TREND
236
  # =========================
237
+ def predict_trend(data):
238
+ try:
239
+ y=[1 if d["sentiment"]=="Positive" else -1 if d["sentiment"]=="Negative" else 0 for d in data]
240
+ if len(y)<5: return "kurang data"
241
+ X=np.arange(len(y)).reshape(-1,1)
242
+ model=LinearRegression().fit(X,y)
243
+ return "Naik Positif" if model.coef_[0]>0 else "Naik Negatif"
244
+ except:
245
+ return "error"
 
 
 
 
246
 
247
 
248
  # =========================
249
+ # 🔥 ROUTES
250
  # =========================
251
+ @app.route("/")
252
  def home():
253
  return render_template("index.html")
254
 
255
 
256
+ @app.route("/analyze", methods=["POST"])
 
 
 
257
  def analyze():
258
  try:
259
+ keyword=request.json.get("keyword")
260
+ source=request.json.get("source","all")
261
 
262
+ raw=collect_data(keyword,source)
263
 
264
+ texts=[t for s,t in raw][:100]
265
+ sources=[s for s,t in raw][:100]
266
 
267
+ sentiments=predict(texts)
268
 
269
+ result=[{"text":t,"sentiment":s,"source":src} for t,s,src in zip(texts,sentiments,sources)]
 
 
 
 
 
 
270
 
271
  # VISUAL
272
  generate_wordcloud(texts)
273
  generate_heatmap(result)
274
  generate_timeline(result)
275
 
276
+ # ANALYSIS
277
+ top_words=get_top_words(texts)
278
+ topics=get_topics(texts)
279
+ insight=generate_insight(result)
280
+ clusters=cluster_opinions(texts)
281
+ hoax=detect_hoax(texts)
282
+ network=build_network(texts)
283
+ bot_network=detect_bot_network(texts)
284
+ trend=predict_trend(result)
285
+
286
+ # ADVANCED
287
+ bot_bert=detect_bot_bert(texts)
288
+ fake_news=detect_fake_news(texts)
289
+ gnn=run_gnn(bot_network["nodes"], bot_network["edges"])
290
+
291
+ # SAVE CSV
292
+ os.makedirs("static",exist_ok=True)
293
+ pd.DataFrame(result).to_csv("static/result.csv",index=False)
294
 
295
  return jsonify({
296
+ "data":result,
297
+ "top_words":top_words,
298
+ "topics":topics,
299
+ "insight":insight,
300
+ "clusters":clusters,
301
+ "hoax":hoax,
302
+ "network":network,
303
+ "bot_network":bot_network,
304
+ "trend":trend,
305
+ "bot_bert":bot_bert,
306
+ "fake_news":fake_news,
307
+ "gnn":gnn
308
  })
309
 
310
  except Exception as e:
311
+ print("ERROR:",e)
312
+ return jsonify({"data":[]})
313
 
314
 
315
+ @app.route("/download")
 
 
 
316
  def download():
317
+ return send_file("static/result.csv",as_attachment=True)
318
 
319
 
320
  # =========================
321
+ # RUN
322
  # =========================
323
+ if __name__=="__main__":
324
+ app.run(host="0.0.0.0",port=7860)