Spaces:

noranisa
/

Sentimen-Analysis

Sleeping

App Files Files Community

noranisa commited on 18 days ago

Commit

45e75e6

verified ·

1 Parent(s): 809e115

Update main.py

Browse files

Files changed (1) hide show

main.py +165 -136

main.py CHANGED Viewed

@@ -3,7 +3,7 @@ from services.aggregator import collect_data
 from services.sentiment import predict
 # =========================
-# IMPORT TAMBAHAN
 # =========================
 from collections import Counter
 import pandas as pd
@@ -11,22 +11,19 @@ import os
 import re
 import numpy as np
-# VISUAL
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
-# ML
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.linear_model import LinearRegression
-# GRAPH
 import networkx as nx
 from itertools import combinations
-# OPTIONAL ADVANCED
 try:
     from services.bot_bert import detect_bot_bert
 except:
@@ -42,46 +39,56 @@ try:
 except:
     def run_gnn(n,e): return []
 app = Flask(__name__)
 # =========================
-# 🔥 UTIL
 # =========================
 def clean_text(t):
-    return re.sub(r'[^a-zA-Z\s]', '', t.lower())
 # =========================
-# 🔥 TOP WORDS
 # =========================
 def get_top_words(texts):
     words = []
     for t in texts:
         words.extend(clean_text(t).split())
-    return [{"word":w,"count":c} for w,c in Counter(words).most_common(10)]
 # =========================
-# 🔥 WORDCLOUD
 # =========================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
-        texts = [t for t in texts if len(t.strip())>3]
-        if not texts: return
-        wc = WordCloud(width=800,height=400).generate(" ".join(texts))
         wc.to_file("static/wordcloud.png")
     except Exception as e:
-        print("wordcloud error:",e)
 # =========================
-# 🔥 HEATMAP
 # =========================
 def generate_heatmap(data):
     try:
-        if not data: return
-        labels = ["Positive","Neutral","Negative"]
         sources = list(set([d["source"] for d in data]))
         matrix = np.zeros((len(sources), len(labels)))
@@ -89,164 +96,197 @@ def generate_heatmap(data):
         for d in data:
             i = sources.index(d["source"])
             j = labels.index(d["sentiment"])
-            matrix[i][j]+=1
-        if matrix.sum()==0: return
         plt.figure()
         plt.imshow(matrix)
-        plt.xticks(range(len(labels)),labels)
-        plt.yticks(range(len(sources)),sources)
         plt.colorbar()
-        os.makedirs("static",exist_ok=True)
         plt.savefig("static/heatmap.png")
         plt.close()
     except Exception as e:
-        print("heatmap error:",e)
 # =========================
-# 🔥 TIMELINE
 # =========================
 def generate_timeline(data):
     try:
-        if not data: return
-        os.makedirs("static", exist_ok=True)
-        pos,neg,neu=[],[],[]
         for d in data:
-            pos.append(1 if d["sentiment"]=="Positive" else 0)
-            neg.append(1 if d["sentiment"]=="Negative" else 0)
-            neu.append(1 if d["sentiment"]=="Neutral" else 0)
         plt.figure()
-        plt.plot(pos,label="Positive")
-        plt.plot(neg,label="Negative")
-        plt.plot(neu,label="Neutral")
         plt.legend()
         plt.savefig("static/timeline.png")
         plt.close()
     except Exception as e:
-        print("timeline error:",e)
 # =========================
-# 🔥 TOPIC MODELING
 # =========================
 def get_topics(texts):
     try:
-        texts = [t for t in texts if len(t)>3]
-        if len(texts)<5: return [["data kurang"]]
         vec = CountVectorizer(min_df=2)
         X = vec.fit_transform(texts)
-        if X.shape[1]==0: return [["kosong"]]
         lda = LatentDirichletAllocation(n_components=3)
         lda.fit(X)
         words = vec.get_feature_names_out()
-        topics=[]
         for t in lda.components_:
             topics.append([words[i] for i in t.argsort()[-5:]])
-        return topics
-    except:
-        return [["error"]]
-# =========================
-# 🔥 INSIGHT
-# =========================
-def generate_insight(data):
-    s=[d["sentiment"] for d in data]
-    return f"Positive:{s.count('Positive')} Negative:{s.count('Negative')} Neutral:{s.count('Neutral')}"
 # =========================
-# 🔥 CLUSTER
 # =========================
 def cluster_opinions(texts):
     try:
-        if len(texts)<5: return []
-        X=TfidfVectorizer(max_features=300).fit_transform(texts)
-        k=KMeans(n_clusters=3,n_init=10).fit(X)
-        clusters={}
-        for i,l in enumerate(k.labels_):
-            clusters.setdefault(l,[]).append(texts[i])
-        return [{"cluster":k,"samples":v[:3]} for k,v in clusters.items()]
-    except:
         return []
 # =========================
-# 🔥 HOAX
 # =========================
 def detect_hoax(texts):
-    kw=["hoax","bohong","fitnah","propaganda"]
-    return [{"text":t,"label":"Hoax" if any(k in t.lower() for k in kw) else "Normal"} for t in texts[:10]]
 # =========================
-# 🔥 NETWORK
 # =========================
 def build_network(texts):
-    edges={}
     for t in texts:
-        w=list(set(t.split()))[:5]
-        for a,b in combinations(w,2):
-            key=tuple(sorted([a,b]))
-            edges[key]=edges.get(key,0)+1
-    return [{"source":k[0],"target":k[1],"weight":v} for k,v in edges.items() if v>1]
 # =========================
-# 🔥 BOT NETWORK
 # =========================
 def detect_bot_network(texts):
     try:
-        if len(texts)<5: return {"nodes":[],"edges":[],"bots":[]}
-        X=TfidfVectorizer(max_features=300).fit_transform(texts)
-        sim=cosine_similarity(X)
-        G=nx.Graph()
         for i in range(len(texts)):
-            G.add_node(i,text=texts[i])
         for i in range(len(texts)):
-            for j in range(i+1,len(texts)):
-                if sim[i][j]>0.75:
-                    G.add_edge(i,j)
-        central=nx.degree_centrality(G)
-        bots=[{"node":i,"score":round(s,2),"text":texts[i]} for i,s in central.items() if s>0.3]
-        nodes=[{"id":i} for i in G.nodes()]
-        edges=[{"source":u,"target":v} for u,v in G.edges()]
-        return {"nodes":nodes,"edges":edges,"bots":bots[:10]}
-    except:
-        return {"nodes":[],"edges":[],"bots":[]}
 # =========================
-# 🔥 TREND
 # =========================
 def predict_trend(data):
     try:
-        y=[1 if d["sentiment"]=="Positive" else -1 if d["sentiment"]=="Negative" else 0 for d in data]
-        if len(y)<5: return "kurang data"
-        X=np.arange(len(y)).reshape(-1,1)
-        model=LinearRegression().fit(X,y)
-        return "Naik Positif" if model.coef_[0]>0 else "Naik Negatif"
-    except:
-        return "error"
 # =========================
-# 🔥 ROUTES
 # =========================
 @app.route("/")
 def home():
@@ -256,17 +296,20 @@ def home():
 @app.route("/analyze", methods=["POST"])
 def analyze():
     try:
-        keyword=request.json.get("keyword")
-        source=request.json.get("source","all")
-        raw=collect_data(keyword,source)
-        texts=[t for s,t in raw][:100]
-        sources=[s for s,t in raw][:100]
-        sentiments=predict(texts)
-        result=[{"text":t,"sentiment":s,"source":src} for t,s,src in zip(texts,sentiments,sources)]
         # VISUAL
         generate_wordcloud(texts)
@@ -274,51 +317,37 @@ def analyze():
         generate_timeline(result)
         # ANALYSIS
-        top_words=get_top_words(texts)
-        topics=get_topics(texts)
-        insight=generate_insight(result)
-        clusters=cluster_opinions(texts)
-        hoax=detect_hoax(texts)
-        network=build_network(texts)
-        bot_network=detect_bot_network(texts)
-        trend=predict_trend(result)
-        # ADVANCED
-        bot_bert=detect_bot_bert(texts)
-        fake_news=detect_fake_news(texts)
-        gnn=run_gnn(bot_network["nodes"], bot_network["edges"])
-        # SAVE CSV
-        os.makedirs("static",exist_ok=True)
-        pd.DataFrame(result).to_csv("static/result.csv",index=False)
-        return jsonify({
-            "data":result,
-            "top_words":top_words,
-            "topics":topics,
-            "insight":insight,
-            "clusters":clusters,
-            "hoax":hoax,
-            "network":network,
-            "bot_network":bot_network,
-            "trend":trend,
-            "bot_bert":bot_bert,
-            "fake_news":fake_news,
-            "gnn":gnn
-        })
     except Exception as e:
-        print("ERROR:",e)
-        return jsonify({"data":[]})
 @app.route("/download")
 def download():
-    return send_file("static/result.csv",as_attachment=True)
 # =========================
 # RUN
 # =========================
-if __name__=="__main__":
-    app.run(host="0.0.0.0",port=7860)

 from services.sentiment import predict
 # =========================
+# IMPORT
 # =========================
 from collections import Counter
 import pandas as pd
 import re
 import numpy as np
 from wordcloud import WordCloud
 import matplotlib.pyplot as plt
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.linear_model import LinearRegression
 import networkx as nx
 from itertools import combinations
+# OPTIONAL (SAFE IMPORT)
 try:
     from services.bot_bert import detect_bot_bert
 except:
 except:
     def run_gnn(n,e): return []
+# =========================
+# INIT
+# =========================
 app = Flask(__name__)
 # =========================
+# CLEAN TEXT
 # =========================
 def clean_text(t):
+    return re.sub(r'[^a-zA-Z\s]', '', str(t).lower())
 # =========================
+# TOP WORDS
 # =========================
 def get_top_words(texts):
     words = []
     for t in texts:
         words.extend(clean_text(t).split())
+    return [{"word": w, "count": c} for w, c in Counter(words).most_common(10)]
 # =========================
+# WORDCLOUD
 # =========================
 def generate_wordcloud(texts):
     try:
         os.makedirs("static", exist_ok=True)
+        texts = [t for t in texts if len(t.strip()) > 3]
+        if not texts:
+            return
+        wc = WordCloud(width=800, height=400).generate(" ".join(texts))
         wc.to_file("static/wordcloud.png")
     except Exception as e:
+        print("❌ wordcloud error:", e)
 # =========================
+# HEATMAP
 # =========================
 def generate_heatmap(data):
     try:
+        if not data:
+            return
+        labels = ["Positive", "Neutral", "Negative"]
         sources = list(set([d["source"] for d in data]))
         matrix = np.zeros((len(sources), len(labels)))
         for d in data:
             i = sources.index(d["source"])
             j = labels.index(d["sentiment"])
+            matrix[i][j] += 1
+        if matrix.sum() == 0:
+            return
         plt.figure()
         plt.imshow(matrix)
+        plt.xticks(range(len(labels)), labels)
+        plt.yticks(range(len(sources)), sources)
         plt.colorbar()
         plt.savefig("static/heatmap.png")
         plt.close()
     except Exception as e:
+        print("❌ heatmap error:", e)
 # =========================
+# TIMELINE
 # =========================
 def generate_timeline(data):
     try:
+        if not data:
+            return
+        pos, neg, neu = [], [], []
         for d in data:
+            pos.append(1 if d["sentiment"] == "Positive" else 0)
+            neg.append(1 if d["sentiment"] == "Negative" else 0)
+            neu.append(1 if d["sentiment"] == "Neutral" else 0)
         plt.figure()
+        plt.plot(pos, label="Positive")
+        plt.plot(neg, label="Negative")
+        plt.plot(neu, label="Neutral")
         plt.legend()
         plt.savefig("static/timeline.png")
         plt.close()
     except Exception as e:
+        print("❌ timeline error:", e)
 # =========================
+# TOPIC MODELING
 # =========================
 def get_topics(texts):
     try:
+        texts = [t for t in texts if len(t.strip()) > 3]
+        if len(texts) < 5:
+            return [["data kurang"]]
         vec = CountVectorizer(min_df=2)
         X = vec.fit_transform(texts)
+        if X.shape[1] == 0:
+            return [["tidak ada kata"]]
         lda = LatentDirichletAllocation(n_components=3)
         lda.fit(X)
         words = vec.get_feature_names_out()
+        topics = []
         for t in lda.components_:
             topics.append([words[i] for i in t.argsort()[-5:]])
+        return topics
+    except Exception as e:
+        print("❌ topic error:", e)
+        return [["error"]]
 # =========================
+# CLUSTER
 # =========================
 def cluster_opinions(texts):
     try:
+        if len(texts) < 5:
+            return []
+        X = TfidfVectorizer(max_features=300).fit_transform(texts)
+        model = KMeans(n_clusters=3, n_init=10)
+        labels = model.fit_predict(X)
+        clusters = {}
+        for i, l in enumerate(labels):
+            clusters.setdefault(l, []).append(texts[i])
+        return [{"cluster": k, "samples": v[:3]} for k, v in clusters.items()]
+    except Exception as e:
+        print("❌ cluster error:", e)
         return []
 # =========================
+# HOAX
 # =========================
 def detect_hoax(texts):
+    kw = ["hoax", "bohong", "fitnah", "propaganda"]
+    return [
+        {"text": t, "label": "Hoax" if any(k in t.lower() for k in kw) else "Normal"}
+        for t in texts[:10]
+    ]
 # =========================
+# NETWORK
 # =========================
 def build_network(texts):
+    edges = {}
     for t in texts:
+        words = list(set(t.split()))[:5]
+        for a, b in combinations(words, 2):
+            key = tuple(sorted([a, b]))
+            edges[key] = edges.get(key, 0) + 1
+    return [{"source": k[0], "target": k[1], "weight": v} for k, v in edges.items() if v > 1]
 # =========================
+# BOT NETWORK
 # =========================
 def detect_bot_network(texts):
     try:
+        if len(texts) < 5:
+            return {"nodes": [], "edges": [], "bots": []}
+        X = TfidfVectorizer(max_features=300).fit_transform(texts)
+        sim = cosine_similarity(X)
+        G = nx.Graph()
         for i in range(len(texts)):
+            G.add_node(i, text=texts[i])
         for i in range(len(texts)):
+            for j in range(i + 1, len(texts)):
+                if sim[i][j] > 0.75:
+                    G.add_edge(i, j)
+        central = nx.degree_centrality(G)
+        bots = [
+            {"node": i, "score": round(s, 2), "text": texts[i]}
+            for i, s in central.items() if s > 0.3
+        ]
+        nodes = [{"id": i} for i in G.nodes()]
+        edges = [{"source": u, "target": v} for u, v in G.edges()]
+        return {"nodes": nodes, "edges": edges, "bots": bots[:10]}
+    except Exception as e:
+        print("❌ bot network error:", e)
+        return {"nodes": [], "edges": [], "bots": []}
 # =========================
+# TREND
 # =========================
 def predict_trend(data):
     try:
+        y = [
+            1 if d["sentiment"] == "Positive"
+            else -1 if d["sentiment"] == "Negative"
+            else 0 for d in data
+        ]
+        if len(y) < 5:
+            return "Data kurang"
+        X = np.arange(len(y)).reshape(-1, 1)
+        model = LinearRegression().fit(X, y)
+        return "📈 Positif" if model.coef_[0] > 0 else "📉 Negatif"
+    except Exception as e:
+        print("❌ trend error:", e)
+        return "Error"
 # =========================
+# ROUTES
 # =========================
 @app.route("/")
 def home():
 @app.route("/analyze", methods=["POST"])
 def analyze():
     try:
+        keyword = request.json.get("keyword")
+        source = request.json.get("source", "all")
+        raw = collect_data(keyword, source)
+        texts = [t for s, t in raw][:100]
+        sources = [s for s, t in raw][:100]
+        sentiments = predict(texts)
+        result = [
+            {"text": t, "sentiment": s, "source": src}
+            for t, s, src in zip(texts, sentiments, sources)
+        ]
         # VISUAL
         generate_wordcloud(texts)
         generate_timeline(result)
         # ANALYSIS
+        response = {
+            "data": result,
+            "top_words": get_top_words(texts),
+            "topics": get_topics(texts),
+            "clusters": cluster_opinions(texts),
+            "hoax": detect_hoax(texts),
+            "network": build_network(texts),
+            "bot_network": detect_bot_network(texts),
+            "trend": predict_trend(result),
+            "bot_bert": detect_bot_bert(texts),
+            "fake_news": detect_fake_news(texts),
+            "gnn": []  # 🔥 DISABLE TORCH SAFE
+        }
+        os.makedirs("static", exist_ok=True)
+        pd.DataFrame(result).to_csv("static/result.csv", index=False)
+        return jsonify(response)
     except Exception as e:
+        print("❌ ERROR:", e)
+        return jsonify({"data": []})
 @app.route("/download")
 def download():
+    return send_file("static/result.csv", as_attachment=True)
 # =========================
 # RUN
 # =========================
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)