Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

App Files Files Community

noranisa commited on 18 days ago

Commit

5cb0ade

verified ·

1 Parent(s): 5f1fb1e

Update main.py

Browse files

Files changed (1) hide show

main.py +187 -182

main.py CHANGED Viewed

@@ -2,96 +2,107 @@ from flask import Flask, render_template, request, jsonify, send_file
 from services.aggregator import collect_data
 from services.sentiment import predict
 from collections import Counter
 import pandas as pd
 import os
 import re
 # VISUAL
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
-import numpy as np
 # ML
-from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.cluster import KMeans
 # =========================
-# INIT
 # =========================
-app = Flask(__name__)
 # =========================
 # 🔥 TOP WORDS
 # =========================
-def get_top_words(texts, top_n=10):
     words = []
     for t in texts:
-        t = re.sub(r'[^a-zA-Z\s]', '', t.lower())
-        words.extend(t.split())
-    return [{"word": w, "count": c} for w, c in Counter(words).most_common(top_n)]
 # =========================
-# 🔥 WORDCLOUD (FIX)
 # =========================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
-        texts = [t for t in texts if len(t.strip()) > 3]
-        if len(texts) == 0:
-            return
-        wc = WordCloud(width=800, height=400).generate(" ".join(texts))
         wc.to_file("static/wordcloud.png")
     except Exception as e:
-        print("❌ Wordcloud error:", e)
 # =========================
-# 🔥 HEATMAP (FIX)
 # =========================
 def generate_heatmap(data):
     try:
-        if len(data) == 0:
-            return
-        labels_sent = ["Positive", "Neutral", "Negative"]
-        labels_src = list(set([d["source"] for d in data]))
-        matrix = np.zeros((len(labels_src), len(labels_sent)))
         for d in data:
-            i = labels_src.index(d["source"])
-            j = labels_sent.index(d["sentiment"])
-            matrix[i][j] += 1
-        if matrix.sum() == 0:
-            return
         plt.figure()
         plt.imshow(matrix)
-        plt.xticks(range(len(labels_sent)), labels_sent)
-        plt.yticks(range(len(labels_src)), labels_src)
-        for i in range(len(labels_src)):
-            for j in range(len(labels_sent)):
-                plt.text(j, i, int(matrix[i][j]), ha='center')
-        plt.title("Heatmap Sentimen")
         plt.colorbar()
-        os.makedirs("static", exist_ok=True)
         plt.savefig("static/heatmap.png")
         plt.close()
     except Exception as e:
-        print("❌ Heatmap error:", e)
 # =========================
@@ -99,221 +110,215 @@ def generate_heatmap(data):
 # =========================
 def generate_timeline(data):
     try:
-        if len(data) == 0:
-            return
         os.makedirs("static", exist_ok=True)
-        timestamps = list(range(len(data)))
-        pos, neg, neu = [], [], []
         for d in data:
-            pos.append(1 if d["sentiment"] == "Positive" else 0)
-            neg.append(1 if d["sentiment"] == "Negative" else 0)
-            neu.append(1 if d["sentiment"] == "Neutral" else 0)
         plt.figure()
-        plt.plot(timestamps, pos, label="Positive")
-        plt.plot(timestamps, neg, label="Negative")
-        plt.plot(timestamps, neu, label="Neutral")
         plt.legend()
-        plt.title("Sentiment Timeline")
         plt.savefig("static/timeline.png")
         plt.close()
     except Exception as e:
-        print("❌ Timeline error:", e)
 # =========================
-# 🔥 TOPIC MODELING (SAFE)
 # =========================
-def get_topics(texts, n_topics=3):
     try:
-        texts = [t for t in texts if len(t.strip()) > 3]
-        if len(texts) < 5:
-            return [["data kurang"]]
-        vectorizer = CountVectorizer(min_df=2)
-        X = vectorizer.fit_transform(texts)
-        if X.shape[1] == 0:
-            return [["tidak ada kata"]]
-        lda = LatentDirichletAllocation(n_components=n_topics, random_state=42)
         lda.fit(X)
-        words = vectorizer.get_feature_names_out()
-        topics = []
-        for topic in lda.components_:
-            topics.append([words[i] for i in topic.argsort()[-5:]])
         return topics
-    except Exception as e:
-        print("❌ LDA error:", e)
-        return [["topic gagal"]]
 # =========================
-# 🤖 AI INSIGHT (RULE SAFE)
 # =========================
-def generate_insight(data, topics):
-    sentiments = [d["sentiment"] for d in data]
-    pos = sentiments.count("Positive")
-    neg = sentiments.count("Negative")
-    neu = sentiments.count("Neutral")
-    total = len(sentiments)
-    if total == 0:
-        return "Tidak ada data"
-    insight = f"""
-Total data: {total}
-Positive: {pos}
-Negative: {neg}
-Neutral: {neu}
-Mayoritas opini: {"Positif" if pos > neg else "Negatif"}
-Topik utama:
-"""
-    for i, t in enumerate(topics):
-        insight += f"\nTopik {i+1}: {', '.join(t)}"
-    return insight
 # =========================
-# 🔥 CLUSTERING
 # =========================
-def cluster_opinions(texts):
     try:
-        texts = [t for t in texts if len(t.strip()) > 5]
-        if len(texts) < 5:
-            return []
-        vectorizer = TfidfVectorizer(max_features=500)
-        X = vectorizer.fit_transform(texts)
-        model = KMeans(n_clusters=3, random_state=42, n_init=10)
-        labels = model.fit_predict(X)
-        clusters = {}
-        for i, label in enumerate(labels):
-            clusters.setdefault(label, []).append(texts[i])
-        result = []
-        for k, v in clusters.items():
-            result.append({"cluster": int(k), "samples": v[:3]})
-        return result
-    except Exception as e:
-        print("❌ clustering error:", e)
-        return []
 # =========================
-# 🚨 HOAX DETECTION
 # =========================
-def detect_hoax(texts):
-    keywords = ["hoax","bohong","fitnah","manipulasi","propaganda","tipu"]
-    result = []
-    for t in texts:
-        score = sum(1 for k in keywords if k in t.lower())
-        result.append({
-            "text": t,
-            "score": score,
-            "label": "Hoax" if score >= 2 else "Normal"
-        })
-    return result
 # =========================
-# 🌐 HOME
 # =========================
-@app.route('/')
 def home():
     return render_template("index.html")
-# =========================
-# 🚀 ANALYZE
-# =========================
-@app.route('/analyze', methods=['POST'])
 def analyze():
     try:
-        keyword = request.json.get('keyword')
-        source = request.json.get('source', 'all')
-        data_raw = collect_data(keyword, source)
-        texts = [t for s, t in data_raw][:100]
-        sources = [s for s, t in data_raw][:100]
-        sentiments = predict(texts)
-        result = []
-        for t, s, src in zip(texts, sentiments, sources):
-            result.append({
-                "text": t,
-                "sentiment": s,
-                "source": src
-            })
         # VISUAL
         generate_wordcloud(texts)
         generate_heatmap(result)
         generate_timeline(result)
-        # ANALYTICS
-        top_words = get_top_words(texts)
-        topics = get_topics(texts)
-        insight = generate_insight(result, topics)
-        clusters = cluster_opinions(texts)
-        hoax = detect_hoax(texts)
-        # CSV
-        os.makedirs("static", exist_ok=True)
-        pd.DataFrame(result).to_csv("static/result.csv", index=False)
         return jsonify({
-            "data": result,
-            "top_words": top_words,
-            "topics": topics,
-            "insight": insight,
-            "clusters": clusters,
-            "hoax": hoax
         })
     except Exception as e:
-        print("❌ ERROR:", e)
-        return jsonify({"data": []})
-# =========================
-# 📥 DOWNLOAD
-# =========================
-@app.route('/download')
 def download():
-    return send_file("static/result.csv", as_attachment=True)
 # =========================
-# ▶️ RUN
 # =========================
-if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=7860)

 from services.aggregator import collect_data
 from services.sentiment import predict
+# =========================
+# IMPORT TAMBAHAN
+# =========================
 from collections import Counter
 import pandas as pd
 import os
 import re
+import numpy as np
 # VISUAL
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 # ML
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.cluster import KMeans
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.linear_model import LinearRegression
+# GRAPH
+import networkx as nx
+from itertools import combinations
+# OPTIONAL ADVANCED
+try:
+    from services.bot_bert import detect_bot_bert
+except:
+    def detect_bot_bert(x): return []
+try:
+    from services.fake_news import detect_fake_news
+except:
+    def detect_fake_news(x): return []
+try:
+    from services.gnn import run_gnn
+except:
+    def run_gnn(n,e): return []
+app = Flask(__name__)
 # =========================
+# 🔥 UTIL
 # =========================
+def clean_text(t):
+    return re.sub(r'[^a-zA-Z\s]', '', t.lower())
 # =========================
 # 🔥 TOP WORDS
 # =========================
+def get_top_words(texts):
     words = []
     for t in texts:
+        words.extend(clean_text(t).split())
+    return [{"word":w,"count":c} for w,c in Counter(words).most_common(10)]
 # =========================
+# 🔥 WORDCLOUD
 # =========================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
+        texts = [t for t in texts if len(t.strip())>3]
+        if not texts: return
+        wc = WordCloud(width=800,height=400).generate(" ".join(texts))
         wc.to_file("static/wordcloud.png")
     except Exception as e:
+        print("wordcloud error:",e)
 # =========================
+# 🔥 HEATMAP
 # =========================
 def generate_heatmap(data):
     try:
+        if not data: return
+        labels = ["Positive","Neutral","Negative"]
+        sources = list(set([d["source"] for d in data]))
+        matrix = np.zeros((len(sources), len(labels)))
         for d in data:
+            i = sources.index(d["source"])
+            j = labels.index(d["sentiment"])
+            matrix[i][j]+=1
+        if matrix.sum()==0: return
         plt.figure()
         plt.imshow(matrix)
+        plt.xticks(range(len(labels)),labels)
+        plt.yticks(range(len(sources)),sources)
         plt.colorbar()
+        os.makedirs("static",exist_ok=True)
         plt.savefig("static/heatmap.png")
         plt.close()
     except Exception as e:
+        print("heatmap error:",e)
 # =========================
 # =========================
 def generate_timeline(data):
     try:
+        if not data: return
         os.makedirs("static", exist_ok=True)
+        pos,neg,neu=[],[],[]
         for d in data:
+            pos.append(1 if d["sentiment"]=="Positive" else 0)
+            neg.append(1 if d["sentiment"]=="Negative" else 0)
+            neu.append(1 if d["sentiment"]=="Neutral" else 0)
         plt.figure()
+        plt.plot(pos,label="Positive")
+        plt.plot(neg,label="Negative")
+        plt.plot(neu,label="Neutral")
         plt.legend()
         plt.savefig("static/timeline.png")
         plt.close()
     except Exception as e:
+        print("timeline error:",e)
 # =========================
+# 🔥 TOPIC MODELING
 # =========================
+def get_topics(texts):
     try:
+        texts = [t for t in texts if len(t)>3]
+        if len(texts)<5: return [["data kurang"]]
+        vec = CountVectorizer(min_df=2)
+        X = vec.fit_transform(texts)
+        if X.shape[1]==0: return [["kosong"]]
+        lda = LatentDirichletAllocation(n_components=3)
         lda.fit(X)
+        words = vec.get_feature_names_out()
+        topics=[]
+        for t in lda.components_:
+            topics.append([words[i] for i in t.argsort()[-5:]])
         return topics
+    except:
+        return [["error"]]
 # =========================
+# 🔥 INSIGHT
 # =========================
+def generate_insight(data):
+    s=[d["sentiment"] for d in data]
+    return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')}"
+# =========================
+# 🔥 CLUSTER
+# =========================
+def cluster_opinions(texts):
+    try:
+        if len(texts)<5: return []
+        X=TfidfVectorizer(max_features=300).fit_transform(texts)
+        k=KMeans(n_clusters=3,n_init=10).fit(X)
+        clusters={}
+        for i,l in enumerate(k.labels_):
+            clusters.setdefault(l,[]).append(texts[i])
+        return [{"cluster":k,"samples":v[:3]} for k,v in clusters.items()]
+    except:
+        return []
+# =========================
+# 🔥 HOAX
+# =========================
+def detect_hoax(texts):
+    kw=["hoax","bohong","fitnah","propaganda"]
+    return [{"text":t,"label":"Hoax" if any(k in t.lower() for k in kw) else "Normal"} for t in texts[:10]]
+# =========================
+# 🔥 NETWORK
+# =========================
+def build_network(texts):
+    edges={}
+    for t in texts:
+        w=list(set(t.split()))[:5]
+        for a,b in combinations(w,2):
+            key=tuple(sorted([a,b]))
+            edges[key]=edges.get(key,0)+1
+    return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]
 # =========================
+# 🔥 BOT NETWORK
 # =========================
+def detect_bot_network(texts):
     try:
+        if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
+        X=TfidfVectorizer(max_features=300).fit_transform(texts)
+        sim=cosine_similarity(X)
+        G=nx.Graph()
+        for i in range(len(texts)):
+            G.add_node(i,text=texts[i])
+        for i in range(len(texts)):
+            for j in range(i+1,len(texts)):
+                if sim[i][j]>0.75:
+                    G.add_edge(i,j)
+        central=nx.degree_centrality(G)
+        bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
+        nodes=[{"id":i} for i in G.nodes()]
+        edges=[{"source":u,"target":v} for u,v in G.edges()]
+        return {"nodes":nodes,"edges":edges,"bots":bots[:10]}
+    except:
+        return {"nodes":[],"edges":[],"bots":[]}
 # =========================
+# 🔥 TREND
 # =========================
+def predict_trend(data):
+    try:
+        y=[1 if d["sentiment"]=="Positive" else -1 if d["sentiment"]=="Negative" else 0 for d in data]
+        if len(y)<5: return "kurang data"
+        X=np.arange(len(y)).reshape(-1,1)
+        model=LinearRegression().fit(X,y)
+        return "Naik Positif" if model.coef_[0]>0 else "Naik Negatif"
+    except:
+        return "error"
 # =========================
+# 🔥 ROUTES
 # =========================
+@app.route("/")
 def home():
     return render_template("index.html")
+@app.route("/analyze", methods=["POST"])
 def analyze():
     try:
+        keyword=request.json.get("keyword")
+        source=request.json.get("source","all")
+        raw=collect_data(keyword,source)
+        texts=[t for s,t in raw][:100]
+        sources=[s for s,t in raw][:100]
+        sentiments=predict(texts)
+        result=[{"text":t,"sentiment":s,"source":src} for t,s,src in zip(texts,sentiments,sources)]
         # VISUAL
         generate_wordcloud(texts)
         generate_heatmap(result)
         generate_timeline(result)
+        # ANALYSIS
+        top_words=get_top_words(texts)
+        topics=get_topics(texts)
+        insight=generate_insight(result)
+        clusters=cluster_opinions(texts)
+        hoax=detect_hoax(texts)
+        network=build_network(texts)
+        bot_network=detect_bot_network(texts)
+        trend=predict_trend(result)
+        # ADVANCED
+        bot_bert=detect_bot_bert(texts)
+        fake_news=detect_fake_news(texts)
+        gnn=run_gnn(bot_network["nodes"], bot_network["edges"])
+        # SAVE CSV
+        os.makedirs("static",exist_ok=True)
+        pd.DataFrame(result).to_csv("static/result.csv",index=False)
         return jsonify({
+            "data":result,
+            "top_words":top_words,
+            "topics":topics,
+            "insight":insight,
+            "clusters":clusters,
+            "hoax":hoax,
+            "network":network,
+            "bot_network":bot_network,
+            "trend":trend,
+            "bot_bert":bot_bert,
+            "fake_news":fake_news,
+            "gnn":gnn
         })
     except Exception as e:
+        print("ERROR:",e)
+        return jsonify({"data":[]})
+@app.route("/download")
 def download():
+    return send_file("static/result.csv",as_attachment=True)
 # =========================
+# RUN
 # =========================
+if __name__=="__main__":
+    app.run(host="0.0.0.0",port=7860)