Spaces:

noranisa
/

Sentimen-Analysis

Running

App Files Files Community

noranisa commited on Mar 17

Commit

409899f

verified ·

1 Parent(s): 40ec3b8

Update main.py

Browse files

Files changed (1) hide show

main.py +107 -26

main.py CHANGED Viewed

@@ -7,14 +7,19 @@ import pandas as pd
 import os
 import re
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 import numpy as np
 from sklearn.decomposition import LatentDirichletAllocation
-from sklearn.feature_extraction.text import CountVectorizer
 app = Flask(__name__)
@@ -23,11 +28,9 @@ app = Flask(__name__)
 # =========================
 def get_top_words(texts, top_n=10):
     words = []
     for t in texts:
         t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
         words.extend(t.split())
     return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
@@ -37,7 +40,6 @@ def get_top_words(texts, top_n=10):
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
         texts = [t for t in texts if len(t.strip()) > 3]
         if len(texts) == 0:
@@ -61,9 +63,6 @@ def generate_heatmap(data):
         labels_sent = ["Positive", "Neutral", "Negative"]
         labels_src = list(set([d["source"] for d in data]))
-        if len(labels_src) == 0:
-            return
         matrix = np.zeros((len(labels_src), len(labels_sent)))
         for d in data:
@@ -95,6 +94,40 @@ def generate_heatmap(data):
         print("❌ Heatmap error:", e)
 # =========================
 # 🔥 TOPIC MODELING (SAFE)
 # =========================
@@ -103,13 +136,13 @@ def get_topics(texts, n_topics=3):
         texts = [t for t in texts if len(t.strip()) > 3]
         if len(texts) < 5:
-            return [["data kurang untuk topic modeling"]]
         vectorizer = CountVectorizer(min_df=2)
         X = vectorizer.fit_transform(texts)
         if X.shape[1] == 0:
-            return [["tidak ada kata valid"]]
         lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
         lda.fit(X)
@@ -118,18 +151,17 @@ def get_topics(texts, n_topics=3):
         topics = []
         for topic in lda.components_:
-            top_words = [words[i] for i in topic.argsort()[-5:]]
-            topics.append(top_words)
         return topics
     except Exception as e:
         print("❌ LDA error:", e)
-        return [["topic gagal dibuat"]]
 # =========================
-# 🤖 AI INSIGHT
 # =========================
 def generate_insight(data, topics):
     sentiments = [d["sentiment"] for d in data]
@@ -145,7 +177,6 @@ def generate_insight(data, topics):
     insight = f"""
 Total data: {total}
 Positive: {pos}
 Negative: {neg}
 Neutral: {neu}
@@ -161,6 +192,55 @@ Topik utama:
     return insight
 # =========================
 # 🌐 HOME
 # =========================
@@ -193,16 +273,20 @@ def analyze():
                 "source": src
             })
-        # 🔥 GENERATE VISUAL
         generate_wordcloud(texts)
         generate_heatmap(result)
-        # 🔥 ANALYTICS
         top_words = get_top_words(texts)
         topics = get_topics(texts)
         insight = generate_insight(result, topics)
-        # 🔥 CSV
         os.makedirs("static", exist_ok=True)
         pd.DataFrame(result).to_csv("static/result.csv", index=False)
@@ -210,17 +294,14 @@ def analyze():
             "data": result,
             "top_words": top_words,
             "topics": topics,
-            "insight": insight
         })
     except Exception as e:
-        print("❌ ERROR ANALYZE:", e)
-        return jsonify({
-            "data": [],
-            "top_words": [],
-            "topics": [["error"]],
-            "insight": "Terjadi error"
-        })
 # =========================

 import os
 import re
+# VISUAL
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 import numpy as np
+# ML
 from sklearn.decomposition import LatentDirichletAllocation
+from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+from sklearn.cluster import KMeans
+# =========================
+# INIT
+# =========================
 app = Flask(__name__)
 # =========================
 def get_top_words(texts, top_n=10):
     words = []
     for t in texts:
         t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
         words.extend(t.split())
     return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
         texts = [t for t in texts if len(t.strip()) > 3]
         if len(texts) == 0:
         labels_sent = ["Positive", "Neutral", "Negative"]
         labels_src = list(set([d["source"] for d in data]))
         matrix = np.zeros((len(labels_src), len(labels_sent)))
         for d in data:
         print("❌ Heatmap error:", e)
+# =========================
+# 🔥 TIMELINE
+# =========================
+def generate_timeline(data):
+    try:
+        if len(data) == 0:
+            return
+        os.makedirs("static", exist_ok=True)
+        timestamps = list(range(len(data)))
+        pos, neg, neu = [], [], []
+        for d in data:
+            pos.append(1 if d["sentiment"] == "Positive" else 0)
+            neg.append(1 if d["sentiment"] == "Negative" else 0)
+            neu.append(1 if d["sentiment"] == "Neutral" else 0)
+        plt.figure()
+        plt.plot(timestamps, pos, label="Positive")
+        plt.plot(timestamps, neg, label="Negative")
+        plt.plot(timestamps, neu, label="Neutral")
+        plt.legend()
+        plt.title("Sentiment Timeline")
+        plt.savefig("static/timeline.png")
+        plt.close()
+    except Exception as e:
+        print("❌ Timeline error:", e)
 # =========================
 # 🔥 TOPIC MODELING (SAFE)
 # =========================
         texts = [t for t in texts if len(t.strip()) > 3]
         if len(texts) < 5:
+            return [["data kurang"]]
         vectorizer = CountVectorizer(min_df=2)
         X = vectorizer.fit_transform(texts)
         if X.shape[1] == 0:
+            return [["tidak ada kata"]]
         lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
         lda.fit(X)
         topics = []
         for topic in lda.components_:
+            topics.append([words[i] for i in topic.argsort()[-5:]])
         return topics
     except Exception as e:
         print("❌ LDA error:", e)
+        return [["topic gagal"]]
 # =========================
+# 🤖 AI INSIGHT (RULE SAFE)
 # =========================
 def generate_insight(data, topics):
     sentiments = [d["sentiment"] for d in data]
     insight = f"""
 Total data: {total}
 Positive: {pos}
 Negative: {neg}
 Neutral: {neu}
     return insight
+# =========================
+# 🔥 CLUSTERING
+# =========================
+def cluster_opinions(texts):
+    try:
+        texts = [t for t in texts if len(t.strip()) > 5]
+        if len(texts) < 5:
+            return []
+        vectorizer = TfidfVectorizer(max_features=500)
+        X = vectorizer.fit_transform(texts)
+        model = KMeans(n_clusters=3, random_state=42, n_init=10)
+        labels = model.fit_predict(X)
+        clusters = {}
+        for i, label in enumerate(labels):
+            clusters.setdefault(label, []).append(texts[i])
+        result = []
+        for k, v in clusters.items():
+            result.append({"cluster": int(k), "samples": v[:3]})
+        return result
+    except Exception as e:
+        print("❌ clustering error:", e)
+        return []
+# =========================
+# 🚨 HOAX DETECTION
+# =========================
+def detect_hoax(texts):
+    keywords = ["hoax","bohong","fitnah","manipulasi","propaganda","tipu"]
+    result = []
+    for t in texts:
+        score = sum(1 for k in keywords if k in t.lower())
+        result.append({
+            "text": t,
+            "score": score,
+            "label": "Hoax" if score >= 2 else "Normal"
+        })
+    return result
 # =========================
 # 🌐 HOME
 # =========================
                 "source": src
             })
+        # VISUAL
         generate_wordcloud(texts)
         generate_heatmap(result)
+        generate_timeline(result)
+        # ANALYTICS
         top_words = get_top_words(texts)
         topics = get_topics(texts)
         insight = generate_insight(result, topics)
+        clusters = cluster_opinions(texts)
+        hoax = detect_hoax(texts)
+        # CSV
         os.makedirs("static", exist_ok=True)
         pd.DataFrame(result).to_csv("static/result.csv", index=False)
             "data": result,
             "top_words": top_words,
             "topics": topics,
+            "insight": insight,
+            "clusters": clusters,
+            "hoax": hoax
         })
     except Exception as e:
+        print("❌ ERROR:", e)
+        return jsonify({"data": []})
 # =========================