Spaces:

Sami2000
/

Military_Topical_Sentiment_Analysis

Sleeping

App Files Files Community

Sami2000 commited on Jul 21, 2025

Commit

bc8676f

verified ·

1 Parent(s): d03cc6d

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -40

app.py CHANGED Viewed

@@ -5,64 +5,106 @@ from sentiment_analyzer import analyze_sentiment
 from reddit_search import search_reddit
 import pandas as pd
 import plotly.express as px
-st.set_page_config(page_title="INDOPACOM Sentiment Dashboard", layout="wide")
-st.title("Military Sentiment Dashboard")
-st.write(
-    """
-    🔎 **About:** This dashboard finds the latest news about a topic, extracts trending keywords,
-    and analyzes public sentiment from Reddit using state-of-the-art AI.
-    \n
-    _Educational demonstration only. Does not represent any official views._
-    """
 )
-# --- NEW: Date range selection ---
-date_range = st.selectbox(
-    "Search news from:",
-    options=[("Last 24 hours", 1), ("Last 7 days", 7)],
-    format_func=lambda x: x[0]
 )
-selected_days = date_range[1]
-# --- NEW: Subreddit input ---
-subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'worldnews'). Leave blank for all.", value="")
-query = st.text_input("Enter your topic or query:", value="Enter Value Here")
 max_articles = st.slider("Number of news articles:", 5, 25, 12)
-# --- CLEANING FUNCTION ---
 def clean_keywords(keywords):
-    """
-    Remove blanks, punctuation-only, and duplicates (case-insensitive).
-    Returns a cleaned list of keywords.
-    """
     cleaned = []
     seen = set()
     for kw in keywords:
         kw = kw.strip()
-        # Only keep if non-empty and contains at least one alphanumeric character
         if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
             cleaned.append(kw)
             seen.add(kw.lower())
     return cleaned
-if st.button("Search"):
-    # --- Fancy progress bar ---
     progress = st.progress(0, text="Fetching news...")
-    # Step 1: Fetch news
     progress.progress(10, text="Fetching news articles...")
     articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
     if articles:
         progress.progress(40, text="Extracting keywords...")
         keywords = extract_keywords(articles)
-        # --- Clean up keywords ---
         keywords = clean_keywords(keywords)
-        st.write("**Extracted Keywords for Reddit Search:**", keywords)
         progress.progress(60, text="Searching Reddit...")
         reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
@@ -70,17 +112,20 @@ if st.button("Search"):
         progress.progress(80, text="Analyzing sentiment...")
         sentiment_results = analyze_sentiment(reddit_data)
-        # --- Display results ---
         st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
-        # Create DataFrame for results
         results_df = pd.DataFrame(reddit_data)
         results_df['sentiment'] = sentiment_results
-        # Optional: Show data table
-        st.dataframe(results_df)
-        # Robust and crash-proof sentiment plot!
         sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
         sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
         sentiment_counts = sentiment_counts.rename(str)
@@ -91,16 +136,47 @@ if st.button("Search"):
             'Count': sentiment_counts.values
         })
         if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
             fig = px.bar(
                 sentiment_df,
                 x='Sentiment',
                 y='Count',
                 labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
                 title='Sentiment Distribution'
             )
             st.plotly_chart(fig, use_container_width=True)
         else:
-            st.info("No valid sentiment data for plotting.")
     else:
-        st.warning("No news articles found for that query. Try a different topic or broaden the date range.")

 from reddit_search import search_reddit
 import pandas as pd
 import plotly.express as px
+import requests
+st.set_page_config(
+    page_title="INDOPACOM Sentiment Dashboard",
+    layout="wide",
+    initial_sidebar_state="expanded"
 )
+st.markdown(
+    "<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Military Sentiment Dashboard</h1>",
+    unsafe_allow_html=True
 )
+st.markdown(
+    "<h4 style='text-align:center; color:#375a7f;'>AI-powered OSINT: See what the news & social web really think</h4>",
+    unsafe_allow_html=True
+)
+st.markdown("---")
+# --- Date range, subreddit, and topic input row ---
+col1, col2, col3 = st.columns([1, 1, 2])
+with col1:
+    date_range = st.selectbox(
+        "Search news from:",
+        options=[("Last 24 hours", 1), ("Last 7 days", 7)],
+        format_func=lambda x: x[0]
+    )
+    selected_days = date_range[1]
+with col2:
+    subreddit = st.text_input(
+        "Subreddit (optional)",
+        value="",
+        help="e.g. 'Military', 'worldnews', or leave blank for all"
+    )
+with col3:
+    query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
 max_articles = st.slider("Number of news articles:", 5, 25, 12)
 def clean_keywords(keywords):
     cleaned = []
     seen = set()
     for kw in keywords:
         kw = kw.strip()
         if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
             cleaned.append(kw)
             seen.add(kw.lower())
     return cleaned
+# --- AI SUMMARY FUNCTION ---
+def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
+    prompt = (
+        f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
+        f"Query: {user_query}\n"
+        f"Keywords found: {', '.join(keywords[:8])}...\n"
+        f"Sentiment counts: {dict(sentiment_counts)}\n"
+        f"Most active subreddits: {', '.join(top_subreddits)}\n"
+        f"Example Reddit post titles: {', '.join(top_posts)}\n"
+        "Then, suggest 3-5 additional related search terms that could improve situational awareness."
+    )
+    url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
+    try:
+        resp = requests.post(url, json={"inputs": prompt}, timeout=60)
+        resp.raise_for_status()
+        output = resp.json()
+        # Output format can vary; support both possible types:
+        if isinstance(output, list) and "generated_text" in output[0]:
+            return output[0]["generated_text"]
+        elif isinstance(output, dict) and "generated_text" in output:
+            return output["generated_text"]
+        else:
+            return "Summary unavailable (unexpected API output)."
+    except Exception as e:
+        return f"Summary unavailable (LLM error: {e})"
+st.markdown("---")
+if st.button("🚀 Analyze!"):
     progress = st.progress(0, text="Fetching news...")
+    # Fetch news
     progress.progress(10, text="Fetching news articles...")
     articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
     if articles:
         progress.progress(40, text="Extracting keywords...")
         keywords = extract_keywords(articles)
         keywords = clean_keywords(keywords)
+        st.markdown("#### 📰 News Stories")
+        with st.expander("View fetched news stories", expanded=False):
+            for art in articles:
+                st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
+                            f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
+                            unsafe_allow_html=True)
+        st.markdown("---")
+        st.markdown("#### 🏷️ Extracted Keywords")
+        st.info(", ".join(keywords) if keywords else "No keywords found.", icon="🔑")
+        st.markdown("---")
         progress.progress(60, text="Searching Reddit...")
         reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
         progress.progress(80, text="Analyzing sentiment...")
         sentiment_results = analyze_sentiment(reddit_data)
+        st.markdown("#### 👾 Reddit Posts")
+        if reddit_data:
+            st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
+        else:
+            st.warning("No Reddit posts found for these keywords.", icon="⚠️")
+        st.markdown("---")
         st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
+        # --- Sentiment Results Table & Plot ---
         results_df = pd.DataFrame(reddit_data)
         results_df['sentiment'] = sentiment_results
+        st.markdown("#### 🧠 Sentiment Analysis")
         sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
         sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
         sentiment_counts = sentiment_counts.rename(str)
             'Count': sentiment_counts.values
         })
+        # Nice color palette for bars
+        palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']
         if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
             fig = px.bar(
                 sentiment_df,
                 x='Sentiment',
                 y='Count',
+                color='Sentiment',
+                color_discrete_sequence=palette,
+                text='Count',
                 labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
                 title='Sentiment Distribution'
             )
+            fig.update_traces(marker_line_width=1, textposition="outside")
+            fig.update_layout(
+                yaxis=dict(title='Count'),
+                xaxis=dict(title='Sentiment'),
+                showlegend=False,
+                plot_bgcolor="#f8fafc",
+                paper_bgcolor="#f8fafc",
+                font=dict(size=15),
+                margin=dict(t=60, b=60, r=40, l=40)
+            )
             st.plotly_chart(fig, use_container_width=True)
         else:
+            st.info("No valid sentiment data for plotting.", icon="😶")
+        # --- AI SUMMARY SECTION ---
+        # Get top 3 subreddits and top 3 post titles for summary
+        top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
+        top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []
+        st.markdown("### 📝 AI-Generated Summary & Suggestions")
+        summary = get_summary_with_hf_llm(
+            keywords=keywords,
+            sentiment_counts=sentiment_counts,
+            top_subreddits=top_subreddits,
+            top_posts=top_posts,
+            user_query=query
+        )
+        st.info(summary)
     else:
+        st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="📰")