noranisa commited on
Commit
a3e4fd3
Β·
verified Β·
1 Parent(s): fe8b2b5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +129 -105
main.py CHANGED
@@ -1,7 +1,6 @@
1
  from flask import Flask, render_template, request, jsonify, send_file
2
  from services.aggregator import collect_data
3
  from services.sentiment import predict
4
- from services.evaluation import evaluate_model
5
 
6
  from collections import Counter
7
  import pandas as pd
@@ -29,63 +28,104 @@ def get_top_words(texts, top_n=10):
29
  t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
30
  words.extend(t.split())
31
 
32
- common = Counter(words).most_common(top_n)
33
- return [{"word": w, "count": c} for w, c in common]
34
 
35
 
36
  # =========================
37
- # πŸ”₯ HEATMAP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # =========================
39
  def generate_heatmap(data):
40
- sentiments = [d["sentiment"] for d in data]
41
- sources = [d["source"] for d in data]
 
 
 
 
 
 
 
42
 
43
- labels_sent = ["Positive", "Neutral", "Negative"]
44
- labels_src = list(set(sources))
45
 
46
- matrix = np.zeros((len(labels_src), len(labels_sent)))
 
 
 
47
 
48
- for d in data:
49
- i = labels_src.index(d["source"])
50
- j = labels_sent.index(d["sentiment"])
51
- matrix[i][j] += 1
52
 
53
- plt.figure()
54
- plt.imshow(matrix)
55
 
56
- plt.xticks(range(len(labels_sent)), labels_sent)
57
- plt.yticks(range(len(labels_src)), labels_src)
58
 
59
- for i in range(len(labels_src)):
60
- for j in range(len(labels_sent)):
61
- plt.text(j, i, int(matrix[i][j]), ha='center')
62
 
63
- plt.title("Heatmap Sentimen")
64
- plt.colorbar()
65
 
66
- os.makedirs("static", exist_ok=True)
67
- plt.savefig("static/heatmap.png")
68
- plt.close()
 
 
 
69
 
70
 
71
  # =========================
72
- # πŸ”₯ TOPIC MODELING (LDA)
73
  # =========================
74
  def get_topics(texts, n_topics=3):
75
- vectorizer = CountVectorizer(stop_words='english')
76
- X = vectorizer.fit_transform(texts)
 
 
 
 
 
 
77
 
78
- lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
79
- lda.fit(X)
80
 
81
- words = vectorizer.get_feature_names_out()
 
82
 
83
- topics = []
84
- for topic in lda.components_:
85
- top_words = [words[i] for i in topic.argsort()[-5:]]
86
- topics.append(top_words)
87
 
88
- return topics
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  # =========================
@@ -93,12 +133,16 @@ def get_topics(texts, n_topics=3):
93
  # =========================
94
  def generate_insight(data, topics):
95
  sentiments = [d["sentiment"] for d in data]
96
- total = len(sentiments)
97
 
98
  pos = sentiments.count("Positive")
99
  neg = sentiments.count("Negative")
100
  neu = sentiments.count("Neutral")
101
 
 
 
 
 
 
102
  insight = f"""
103
  Total data: {total}
104
 
@@ -106,7 +150,7 @@ Positive: {pos}
106
  Negative: {neg}
107
  Neutral: {neu}
108
 
109
- Mayoritas opini adalah {"positif" if pos > neg else "negatif"}.
110
 
111
  Topik utama:
112
  """
@@ -118,7 +162,7 @@ Topik utama:
118
 
119
 
120
  # =========================
121
- # 🌐 HALAMAN UTAMA
122
  # =========================
123
  @app.route('/')
124
  def home():
@@ -126,81 +170,61 @@ def home():
126
 
127
 
128
  # =========================
129
- # πŸš€ ANALYZE API (AJAX)
130
  # =========================
131
  @app.route('/analyze', methods=['POST'])
132
  def analyze():
133
- keyword = request.json.get('keyword')
134
- source = request.json.get('source', 'all')
 
135
 
136
- # ambil data
137
- data_raw = collect_data(keyword, source)
138
 
139
- texts = [t for s, t in data_raw][:100]
140
- sources = [s for s, t in data_raw][:100]
141
 
142
- sentiments = predict(texts)
143
 
144
- result = []
145
- for t, s, src in zip(texts, sentiments, sources):
146
- result.append({
147
- "text": t,
148
- "sentiment": s,
149
- "source": src
150
- })
151
 
152
- # =====================
153
- # πŸ”₯ WORDCLOUD
154
- # =====================
155
- try:
 
 
 
 
 
 
156
  os.makedirs("static", exist_ok=True)
157
- wc = WordCloud(width=800, height=400).generate(" ".join(texts))
158
- wc.to_file("static/wordcloud.png")
159
- except:
160
- pass
161
-
162
- # =====================
163
- # πŸ“ CSV EXPORT
164
- # =====================
165
- df = pd.DataFrame(result)
166
- df.to_csv("static/result.csv", index=False)
167
-
168
- # =====================
169
- # πŸ“Š HEATMAP
170
- # =====================
171
- generate_heatmap(result)
172
-
173
- # =====================
174
- # πŸ”₯ TOP WORDS
175
- # =====================
176
- top_words = get_top_words(texts)
177
-
178
- # =====================
179
- # 🧠 TOPIC MODELING
180
- # =====================
181
- topics = get_topics(texts)
182
-
183
- # =====================
184
- # πŸ€– AI INSIGHT
185
- # =====================
186
- insight = generate_insight(result, topics)
187
-
188
- # =====================
189
- # πŸ“Š EVALUASI MODEL
190
- # =====================
191
- eval_result = evaluate_model(predict)
192
-
193
- return jsonify({
194
- "data": result,
195
- "top_words": top_words,
196
- "topics": topics,
197
- "insight": insight,
198
- "eval": eval_result
199
- })
200
-
201
-
202
- # =========================
203
- # πŸ“₯ DOWNLOAD CSV
204
  # =========================
205
  @app.route('/download')
206
  def download():
 
1
  from flask import Flask, render_template, request, jsonify, send_file
2
  from services.aggregator import collect_data
3
  from services.sentiment import predict
 
4
 
5
  from collections import Counter
6
  import pandas as pd
 
28
  t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
29
  words.extend(t.split())
30
 
31
+ return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
 
32
 
33
 
34
  # =========================
35
+ # πŸ”₯ WORDCLOUD (FIX)
36
+ # =========================
37
+ def generate_wordcloud(texts):
38
+ try:
39
+ os.makedirs("static", exist_ok=True)
40
+
41
+ texts = [t for t in texts if len(t.strip()) > 3]
42
+
43
+ if len(texts) == 0:
44
+ return
45
+
46
+ wc = WordCloud(width=800, height=400).generate(" ".join(texts))
47
+ wc.to_file("static/wordcloud.png")
48
+
49
+ except Exception as e:
50
+ print("❌ Wordcloud error:", e)
51
+
52
+
53
+ # =========================
54
+ # πŸ”₯ HEATMAP (FIX)
55
  # =========================
56
  def generate_heatmap(data):
57
+ try:
58
+ if len(data) == 0:
59
+ return
60
+
61
+ labels_sent = ["Positive", "Neutral", "Negative"]
62
+ labels_src = list(set([d["source"] for d in data]))
63
+
64
+ if len(labels_src) == 0:
65
+ return
66
 
67
+ matrix = np.zeros((len(labels_src), len(labels_sent)))
 
68
 
69
+ for d in data:
70
+ i = labels_src.index(d["source"])
71
+ j = labels_sent.index(d["sentiment"])
72
+ matrix[i][j] += 1
73
 
74
+ if matrix.sum() == 0:
75
+ return
 
 
76
 
77
+ plt.figure()
78
+ plt.imshow(matrix)
79
 
80
+ plt.xticks(range(len(labels_sent)), labels_sent)
81
+ plt.yticks(range(len(labels_src)), labels_src)
82
 
83
+ for i in range(len(labels_src)):
84
+ for j in range(len(labels_sent)):
85
+ plt.text(j, i, int(matrix[i][j]), ha='center')
86
 
87
+ plt.title("Heatmap Sentimen")
88
+ plt.colorbar()
89
 
90
+ os.makedirs("static", exist_ok=True)
91
+ plt.savefig("static/heatmap.png")
92
+ plt.close()
93
+
94
+ except Exception as e:
95
+ print("❌ Heatmap error:", e)
96
 
97
 
98
  # =========================
99
+ # πŸ”₯ TOPIC MODELING (SAFE)
100
  # =========================
101
  def get_topics(texts, n_topics=3):
102
+ try:
103
+ texts = [t for t in texts if len(t.strip()) > 3]
104
+
105
+ if len(texts) < 5:
106
+ return [["data kurang untuk topic modeling"]]
107
+
108
+ vectorizer = CountVectorizer(min_df=2)
109
+ X = vectorizer.fit_transform(texts)
110
 
111
+ if X.shape[1] == 0:
112
+ return [["tidak ada kata valid"]]
113
 
114
+ lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
115
+ lda.fit(X)
116
 
117
+ words = vectorizer.get_feature_names_out()
 
 
 
118
 
119
+ topics = []
120
+ for topic in lda.components_:
121
+ top_words = [words[i] for i in topic.argsort()[-5:]]
122
+ topics.append(top_words)
123
+
124
+ return topics
125
+
126
+ except Exception as e:
127
+ print("❌ LDA error:", e)
128
+ return [["topic gagal dibuat"]]
129
 
130
 
131
  # =========================
 
133
  # =========================
134
  def generate_insight(data, topics):
135
  sentiments = [d["sentiment"] for d in data]
 
136
 
137
  pos = sentiments.count("Positive")
138
  neg = sentiments.count("Negative")
139
  neu = sentiments.count("Neutral")
140
 
141
+ total = len(sentiments)
142
+
143
+ if total == 0:
144
+ return "Tidak ada data"
145
+
146
  insight = f"""
147
  Total data: {total}
148
 
 
150
  Negative: {neg}
151
  Neutral: {neu}
152
 
153
+ Mayoritas opini: {"Positif" if pos > neg else "Negatif"}
154
 
155
  Topik utama:
156
  """
 
162
 
163
 
164
  # =========================
165
+ # 🌐 HOME
166
  # =========================
167
  @app.route('/')
168
  def home():
 
170
 
171
 
172
  # =========================
173
+ # πŸš€ ANALYZE
174
  # =========================
175
  @app.route('/analyze', methods=['POST'])
176
  def analyze():
177
+ try:
178
+ keyword = request.json.get('keyword')
179
+ source = request.json.get('source', 'all')
180
 
181
+ data_raw = collect_data(keyword, source)
 
182
 
183
+ texts = [t for s, t in data_raw][:100]
184
+ sources = [s for s, t in data_raw][:100]
185
 
186
+ sentiments = predict(texts)
187
 
188
+ result = []
189
+ for t, s, src in zip(texts, sentiments, sources):
190
+ result.append({
191
+ "text": t,
192
+ "sentiment": s,
193
+ "source": src
194
+ })
195
 
196
+ # πŸ”₯ GENERATE VISUAL
197
+ generate_wordcloud(texts)
198
+ generate_heatmap(result)
199
+
200
+ # πŸ”₯ ANALYTICS
201
+ top_words = get_top_words(texts)
202
+ topics = get_topics(texts)
203
+ insight = generate_insight(result, topics)
204
+
205
+ # πŸ”₯ CSV
206
  os.makedirs("static", exist_ok=True)
207
+ pd.DataFrame(result).to_csv("static/result.csv", index=False)
208
+
209
+ return jsonify({
210
+ "data": result,
211
+ "top_words": top_words,
212
+ "topics": topics,
213
+ "insight": insight
214
+ })
215
+
216
+ except Exception as e:
217
+ print("❌ ERROR ANALYZE:", e)
218
+ return jsonify({
219
+ "data": [],
220
+ "top_words": [],
221
+ "topics": [["error"]],
222
+ "insight": "Terjadi error"
223
+ })
224
+
225
+
226
+ # =========================
227
+ # πŸ“₯ DOWNLOAD
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  # =========================
229
  @app.route('/download')
230
  def download():