Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

App Files Files Community

noranisa commited on 18 days ago

Commit

a3e4fd3

verified ·

1 Parent(s): fe8b2b5

Update main.py

Browse files

Files changed (1) hide show

main.py +129 -105

main.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from flask import Flask, render_template, request, jsonify, send_file
 from services.aggregator import collect_data
 from services.sentiment import predict
-from services.evaluation import evaluate_model
 from collections import Counter
 import pandas as pd
@@ -29,63 +28,104 @@ def get_top_words(texts, top_n=10):
         t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
         words.extend(t.split())
-    common = Counter(words).most_common(top_n)
-    return [{"word": w, "count": c} for w, c in common]
 # =========================
-# 🔥 HEATMAP
 # =========================
 def generate_heatmap(data):
-    sentiments = [d["sentiment"] for d in data]
-    sources = [d["source"] for d in data]
-    labels_sent = ["Positive", "Neutral", "Negative"]
-    labels_src = list(set(sources))
-    matrix = np.zeros((len(labels_src), len(labels_sent)))
-    for d in data:
-        i = labels_src.index(d["source"])
-        j = labels_sent.index(d["sentiment"])
-        matrix[i][j] += 1
-    plt.figure()
-    plt.imshow(matrix)
-    plt.xticks(range(len(labels_sent)), labels_sent)
-    plt.yticks(range(len(labels_src)), labels_src)
-    for i in range(len(labels_src)):
-        for j in range(len(labels_sent)):
-            plt.text(j, i, int(matrix[i][j]), ha='center')
-    plt.title("Heatmap Sentimen")
-    plt.colorbar()
-    os.makedirs("static", exist_ok=True)
-    plt.savefig("static/heatmap.png")
-    plt.close()
 # =========================
-# 🔥 TOPIC MODELING (LDA)
 # =========================
 def get_topics(texts, n_topics=3):
-    vectorizer = CountVectorizer(stop_words='english')
-    X = vectorizer.fit_transform(texts)
-    lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
-    lda.fit(X)
-    words = vectorizer.get_feature_names_out()
-    topics = []
-    for topic in lda.components_:
-        top_words = [words[i] for i in topic.argsort()[-5:]]
-        topics.append(top_words)
-    return topics
 # =========================
@@ -93,12 +133,16 @@ def get_topics(texts, n_topics=3):
 # =========================
 def generate_insight(data, topics):
     sentiments = [d["sentiment"] for d in data]
-    total = len(sentiments)
     pos = sentiments.count("Positive")
     neg = sentiments.count("Negative")
     neu = sentiments.count("Neutral")
     insight = f"""
 Total data: {total}
@@ -106,7 +150,7 @@ Positive: {pos}
 Negative: {neg}
 Neutral: {neu}
-Mayoritas opini adalah {"positif" if pos > neg else "negatif"}.
 Topik utama:
 """
@@ -118,7 +162,7 @@ Topik utama:
 # =========================
-# 🌐 HALAMAN UTAMA
 # =========================
 @app.route('/')
 def home():
@@ -126,81 +170,61 @@ def home():
 # =========================
-# 🚀 ANALYZE API (AJAX)
 # =========================
 @app.route('/analyze', methods=['POST'])
 def analyze():
-    keyword = request.json.get('keyword')
-    source = request.json.get('source', 'all')
-    # ambil data
-    data_raw = collect_data(keyword, source)
-    texts = [t for s, t in data_raw][:100]
-    sources = [s for s, t in data_raw][:100]
-    sentiments = predict(texts)
-    result = []
-    for t, s, src in zip(texts, sentiments, sources):
-        result.append({
-            "text": t,
-            "sentiment": s,
-            "source": src
-        })
-    # =====================
-    # 🔥 WORDCLOUD
-    # =====================
-    try:
         os.makedirs("static", exist_ok=True)
-        wc = WordCloud(width=800, height=400).generate(" ".join(texts))
-        wc.to_file("static/wordcloud.png")
-    except:
-        pass
-    # =====================
-    # 📁 CSV EXPORT
-    # =====================
-    df = pd.DataFrame(result)
-    df.to_csv("static/result.csv", index=False)
-    # =====================
-    # 📊 HEATMAP
-    # =====================
-    generate_heatmap(result)
-    # =====================
-    # 🔥 TOP WORDS
-    # =====================
-    top_words = get_top_words(texts)
-    # =====================
-    # 🧠 TOPIC MODELING
-    # =====================
-    topics = get_topics(texts)
-    # =====================
-    # 🤖 AI INSIGHT
-    # =====================
-    insight = generate_insight(result, topics)
-    # =====================
-    # 📊 EVALUASI MODEL
-    # =====================
-    eval_result = evaluate_model(predict)
-    return jsonify({
-        "data": result,
-        "top_words": top_words,
-        "topics": topics,
-        "insight": insight,
-        "eval": eval_result
-    })
-# =========================
-# 📥 DOWNLOAD CSV
 # =========================
 @app.route('/download')
 def download():

 from flask import Flask, render_template, request, jsonify, send_file
 from services.aggregator import collect_data
 from services.sentiment import predict
 from collections import Counter
 import pandas as pd
         t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
         words.extend(t.split())
+    return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
 # =========================
+# 🔥 WORDCLOUD (FIX)
+# =========================
+def generate_wordcloud(texts):
+    try:
+        os.makedirs("static", exist_ok=True)
+        texts = [t for t in texts if len(t.strip()) > 3]
+        if len(texts) == 0:
+            return
+        wc = WordCloud(width=800, height=400).generate(" ".join(texts))
+        wc.to_file("static/wordcloud.png")
+    except Exception as e:
+        print("❌ Wordcloud error:", e)
+# =========================
+# 🔥 HEATMAP (FIX)
 # =========================
 def generate_heatmap(data):
+    try:
+        if len(data) == 0:
+            return
+        labels_sent = ["Positive", "Neutral", "Negative"]
+        labels_src = list(set([d["source"] for d in data]))
+        if len(labels_src) == 0:
+            return
+        matrix = np.zeros((len(labels_src), len(labels_sent)))
+        for d in data:
+            i = labels_src.index(d["source"])
+            j = labels_sent.index(d["sentiment"])
+            matrix[i][j] += 1
+        if matrix.sum() == 0:
+            return
+        plt.figure()
+        plt.imshow(matrix)
+        plt.xticks(range(len(labels_sent)), labels_sent)
+        plt.yticks(range(len(labels_src)), labels_src)
+        for i in range(len(labels_src)):
+            for j in range(len(labels_sent)):
+                plt.text(j, i, int(matrix[i][j]), ha='center')
+        plt.title("Heatmap Sentimen")
+        plt.colorbar()
+        os.makedirs("static", exist_ok=True)
+        plt.savefig("static/heatmap.png")
+        plt.close()
+    except Exception as e:
+        print("❌ Heatmap error:", e)
 # =========================
+# 🔥 TOPIC MODELING (SAFE)
 # =========================
 def get_topics(texts, n_topics=3):
+    try:
+        texts = [t for t in texts if len(t.strip()) > 3]
+        if len(texts) < 5:
+            return [["data kurang untuk topic modeling"]]
+        vectorizer = CountVectorizer(min_df=2)
+        X = vectorizer.fit_transform(texts)
+        if X.shape[1] == 0:
+            return [["tidak ada kata valid"]]
+        lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
+        lda.fit(X)
+        words = vectorizer.get_feature_names_out()
+        topics = []
+        for topic in lda.components_:
+            top_words = [words[i] for i in topic.argsort()[-5:]]
+            topics.append(top_words)
+        return topics
+    except Exception as e:
+        print("❌ LDA error:", e)
+        return [["topic gagal dibuat"]]
 # =========================
 # =========================
 def generate_insight(data, topics):
     sentiments = [d["sentiment"] for d in data]
     pos = sentiments.count("Positive")
     neg = sentiments.count("Negative")
     neu = sentiments.count("Neutral")
+    total = len(sentiments)
+    if total == 0:
+        return "Tidak ada data"
     insight = f"""
 Total data: {total}
 Negative: {neg}
 Neutral: {neu}
+Mayoritas opini: {"Positif" if pos > neg else "Negatif"}
 Topik utama:
 """
 # =========================
+# 🌐 HOME
 # =========================
 @app.route('/')
 def home():
 # =========================
+# 🚀 ANALYZE
 # =========================
 @app.route('/analyze', methods=['POST'])
 def analyze():
+    try:
+        keyword = request.json.get('keyword')
+        source = request.json.get('source', 'all')
+        data_raw = collect_data(keyword, source)
+        texts = [t for s, t in data_raw][:100]
+        sources = [s for s, t in data_raw][:100]
+        sentiments = predict(texts)
+        result = []
+        for t, s, src in zip(texts, sentiments, sources):
+            result.append({
+                "text": t,
+                "sentiment": s,
+                "source": src
+            })
+        # 🔥 GENERATE VISUAL
+        generate_wordcloud(texts)
+        generate_heatmap(result)
+        # 🔥 ANALYTICS
+        top_words = get_top_words(texts)
+        topics = get_topics(texts)
+        insight = generate_insight(result, topics)
+        # 🔥 CSV
         os.makedirs("static", exist_ok=True)
+        pd.DataFrame(result).to_csv("static/result.csv", index=False)
+        return jsonify({
+            "data": result,
+            "top_words": top_words,
+            "topics": topics,
+            "insight": insight
+        })
+    except Exception as e:
+        print("❌ ERROR ANALYZE:", e)
+        return jsonify({
+            "data": [],
+            "top_words": [],
+            "topics": [["error"]],
+            "insight": "Terjadi error"
+        })
+# =========================
+# 📥 DOWNLOAD
 # =========================
 @app.route('/download')
 def download():