Spaces:

Sami2000
/

Military_Topical_Sentiment_Analysis

Sleeping

App Files Files Community

Sami2000 commited on Jul 20, 2025

Commit

84677f9

verified ·

1 Parent(s): 536ee73

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -62

app.py CHANGED Viewed

@@ -32,6 +32,22 @@ subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'w
 query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
 max_articles = st.slider("Number of news articles:", 5, 25, 12)
 if st.button("Search"):
     # --- Fancy progress bar ---
     progress = st.progress(0, text="Fetching news...")
@@ -43,70 +59,36 @@ if st.button("Search"):
     if articles:
         progress.progress(40, text="Extracting keywords...")
         keywords = extract_keywords(articles)
         progress.progress(60, text="Searching Reddit...")
         reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
         progress.progress(80, text="Analyzing sentiment...")
-        sentiments = analyze_sentiment(reddit_data)
-        progress.progress(100, text="Done!")
-        tab1, tab2, tab3, tab4 = st.tabs(["News", "Keywords", "Reddit", "Sentiment"])
-        with tab1:
-            st.subheader("News Articles")
-            st.dataframe([
-                {
-                    "Title": a.get("title", ""),
-                    "Source": a.get("source", ""),
-                    "Published": a.get("publishedAt", ""),
-                    "URL": a.get("url", "")
-                } for a in articles[:max_articles]
-            ])
-        with tab2:
-            st.subheader("Top Keywords")
-            st.write(", ".join(keywords))
-        with tab3:
-            st.subheader("Reddit Comments")
-            if reddit_data:
-                comments = []
-                if isinstance(reddit_data, dict):
-                    for v in reddit_data.values():
-                        comments.extend(v)
-                elif isinstance(reddit_data, list):
-                    comments = reddit_data
-                if comments:
-                    st.dataframe([
-                        {
-                            "Comment": c.get("body", "")[:140] + ("..." if len(c.get("body", "")) > 140 else ""),
-                            "Subreddit": c.get("subreddit", ""),
-                            "Upvotes": c.get("score", ""),
-                        }
-                        for c in comments[:30]
-                    ])
-                else:
-                    st.info("No Reddit comments found.")
-            else:
-                st.info("No Reddit data found.")
-        with tab4:
-            st.subheader("Sentiment Results")
-            if sentiments:
-                df = pd.DataFrame(sentiments)
-                st.dataframe(df[["body", "sentiment"]].rename(columns={"body": "Comment"}))
-                # Show pie chart of sentiment
-                sentiment_counts = df["sentiment"].value_counts().reset_index()
-                sentiment_counts.columns = ["Sentiment", "Count"]
-                fig = px.pie(sentiment_counts, names="Sentiment", values="Count",
-                             title="Sentiment Distribution")
-                st.plotly_chart(fig, use_container_width=True)
-            else:
-                st.info("No sentiment data found.")
     else:
-        progress.empty()
-        st.warning("No articles found for your search. Try a different query.")
-# --- END OF DASHBOARD CODE ---

 query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
 max_articles = st.slider("Number of news articles:", 5, 25, 12)
+# --- CLEANING FUNCTION ---
+def clean_keywords(keywords):
+    """
+    Remove blanks, punctuation-only, and duplicates (case-insensitive).
+    Returns a cleaned list of keywords.
+    """
+    cleaned = []
+    seen = set()
+    for kw in keywords:
+        kw = kw.strip()
+        # Only keep if non-empty and contains at least one alphanumeric character
+        if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
+            cleaned.append(kw)
+            seen.add(kw.lower())
+    return cleaned
 if st.button("Search"):
     # --- Fancy progress bar ---
     progress = st.progress(0, text="Fetching news...")
     if articles:
         progress.progress(40, text="Extracting keywords...")
         keywords = extract_keywords(articles)
+        # --- Clean up keywords ---
+        keywords = clean_keywords(keywords)
+        st.write("**Extracted Keywords for Reddit Search:**", keywords)
         progress.progress(60, text="Searching Reddit...")
         reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
         progress.progress(80, text="Analyzing sentiment...")
+        sentiment_results = analyze_sentiment([item["body"] for item in reddit_data])
+        # --- Display results ---
+        st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
+        # Create DataFrame for results
+        results_df = pd.DataFrame(reddit_data)
+        results_df['sentiment'] = sentiment_results
+        # Optional: Show data table
+        st.dataframe(results_df)
+        # Optional: Show a sentiment plot
+        sentiment_counts = results_df['sentiment'].value_counts()
+        fig = px.bar(
+            x=sentiment_counts.index,
+            y=sentiment_counts.values,
+            labels={'x': 'Sentiment', 'y': 'Count'},
+            title='Sentiment Distribution'
+        )
+        st.plotly_chart(fig, use_container_width=True)
     else:
+        st.warning("No news articles found for that query. Try a different topic or broaden the date range.")