Spaces:

Sami2000
/

Military_Topical_Sentiment_Analysis

Sleeping

App Files Files Community

Sami2000 commited on Jul 19, 2025

Commit

12d8780

verified ·

1 Parent(s): 375f97c

Upload 7 files

Browse files

Files changed (7) hide show

app.py +112 -0
keyword_extractor.py +30 -0
main.py +27 -0
news_fetcher.py +53 -0
reddit_search.py +59 -0
results_compiler.py +29 -0
sentiment_analyzer.py +37 -0

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import streamlit as st
+from news_fetcher import fetch_news
+from keyword_extractor import extract_keywords
+from sentiment_analyzer import analyze_sentiment
+from reddit_search import search_reddit
+import pandas as pd
+import plotly.express as px
+st.set_page_config(page_title="INDOPACOM Sentiment Dashboard", layout="wide")
+st.title("Military Sentiment Dashboard")
+st.write(
+    """
+    🔎 **About:** This dashboard finds the latest news about a topic, extracts trending keywords,
+    and analyzes public sentiment from Reddit using state-of-the-art AI.
+    \n
+    _Educational demonstration only. Does not represent any official views._
+    """
+)
+# --- NEW: Date range selection ---
+date_range = st.selectbox(
+    "Search news from:",
+    options=[("Last 24 hours", 1), ("Last 7 days", 7)],
+    format_func=lambda x: x[0]
+)
+selected_days = date_range[1]
+# --- NEW: Subreddit input ---
+subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'worldnews'). Leave blank for all.", value="")
+query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
+max_articles = st.slider("Number of news articles:", 5, 25, 12)
+if st.button("Search"):
+    # --- Fancy progress bar ---
+    progress = st.progress(0, text="Fetching news...")
+    # Step 1: Fetch news
+    progress.progress(10, text="Fetching news articles...")
+    articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
+    if articles:
+        progress.progress(40, text="Extracting keywords...")
+        keywords = extract_keywords(articles)
+        progress.progress(60, text="Searching Reddit...")
+        reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
+        progress.progress(80, text="Analyzing sentiment...")
+        sentiments = analyze_sentiment(reddit_data)
+        progress.progress(100, text="Done!")
+        tab1, tab2, tab3, tab4 = st.tabs(["News", "Keywords", "Reddit", "Sentiment"])
+        with tab1:
+            st.subheader("News Articles")
+            st.dataframe([
+                {
+                    "Title": a.get("title", ""),
+                    "Source": a.get("source", ""),
+                    "Published": a.get("publishedAt", ""),
+                    "URL": a.get("url", "")
+                } for a in articles[:max_articles]
+            ])
+        with tab2:
+            st.subheader("Top Keywords")
+            st.write(", ".join(keywords))
+        with tab3:
+            st.subheader("Reddit Comments")
+            if reddit_data:
+                comments = []
+                if isinstance(reddit_data, dict):
+                    for v in reddit_data.values():
+                        comments.extend(v)
+                elif isinstance(reddit_data, list):
+                    comments = reddit_data
+                if comments:
+                    st.dataframe([
+                        {
+                            "Comment": c.get("body", "")[:140] + ("..." if len(c.get("body", "")) > 140 else ""),
+                            "Subreddit": c.get("subreddit", ""),
+                            "Upvotes": c.get("score", ""),
+                        }
+                        for c in comments[:30]
+                    ])
+                else:
+                    st.info("No Reddit comments found.")
+            else:
+                st.info("No Reddit data found.")
+        with tab4:
+            st.subheader("Sentiment Results")
+            if sentiments:
+                df = pd.DataFrame(sentiments)
+                st.dataframe(df[["body", "sentiment"]].rename(columns={"body": "Comment"}))
+                # Show pie chart of sentiment
+                sentiment_counts = df["sentiment"].value_counts().reset_index()
+                sentiment_counts.columns = ["Sentiment", "Count"]
+                fig = px.pie(sentiment_counts, names="Sentiment", values="Count",
+                             title="Sentiment Distribution")
+                st.plotly_chart(fig, use_container_width=True)
+            else:
+                st.info("No sentiment data found.")
+    else:
+        progress.empty()
+        st.warning("No articles found for your search. Try a different query.")
+# --- END OF DASHBOARD CODE ---

keyword_extractor.py ADDED Viewed

	@@ -0,0 +1,30 @@

+# keyword_extractor.py (using KeyBERT, no OpenAI required)
+from keybert import KeyBERT
+# Initialize KeyBERT with a small, efficient model
+kw_model = KeyBERT('all-MiniLM-L6-v2')
+def extract_keywords(articles, num_keywords=10):
+    """
+    articles: list of dicts, each with 'title' and 'content'
+    returns: list of unique keywords/phrases (strings)
+    """
+    all_text = " ".join([
+        art.get("title", "") + " " + art.get("content", "")
+        for art in articles if art
+    ])
+    # Extract top keywords and phrases
+    keywords = kw_model.extract_keywords(
+        all_text,
+        keyphrase_ngram_range=(1, 2),
+        stop_words='english',
+        top_n=num_keywords
+    )
+    # keywords is a list of tuples: [(keyword, score), ...]
+    keywords = [kw for kw, score in keywords]
+    return keywords

main.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from fastapi import FastAPI, Query
+from news_fetcher import fetch_news
+from keyword_extractor import extract_keywords
+from reddit_search import search_reddit
+from sentiment_analyzer import analyze_sentiment
+from results_compiler import compile_results
+app = FastAPI()
+@app.get("/")
+def read_root():
+    return {"message": "Hello, this is your INDOPACOM Sentiment App backend!"}
+@app.get("/run_workflow")
+def run_workflow(query: str = Query("US Army INDOPACOM")):  # <--- add Query parameter
+    articles = fetch_news(query=query)  # <--- pass user query to fetch_news
+    print(f"Fetched {len(articles)} articles for query: {query}")
+    keywords = extract_keywords(articles)
+    print(f"Extracted keywords: {keywords}")
+    reddit_data = search_reddit(keywords)
+    print(f"Reddit data: {reddit_data}")
+    sentiment = analyze_sentiment(reddit_data)
+    print(f"Sentiment: {sentiment}")
+    results = compile_results(articles, keywords, reddit_data, sentiment)
+    return results

news_fetcher.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+from dotenv import load_dotenv
+# On Hugging Face, just call load_dotenv() (or omit if you want)
+load_dotenv()
+import requests
+from datetime import datetime, timedelta
+GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
+NEWSAPI_API_KEY = os.getenv("NEWSAPI_API_KEY")
+def fetch_news(query="US Army INDOPACOM", days=1, max_results=10):
+    if not GNEWS_API_KEY or not NEWSAPI_API_KEY:
+        print("Missing API keys! Check your Hugging Face secrets.")
+        return []
+    today = datetime.utcnow()
+    yesterday = today - timedelta(days=days)
+    from_date = yesterday.strftime("%Y-%m-%d")
+    to_date = today.strftime("%Y-%m-%d")
+    articles = []
+    # GNews API
+    gnews_url = (
+        f"https://gnews.io/api/v4/search?q={query}"
+        f"&from={from_date}&to={to_date}"
+        f"&lang=en&max={max_results}&apikey={GNEWS_API_KEY}"
+    )
+    gnews_response = requests.get(gnews_url)
+    if gnews_response.status_code == 200:
+        gnews_articles = gnews_response.json().get("articles", [])
+        articles.extend(gnews_articles)
+    else:
+        print(f"GNews API error: {gnews_response.status_code}")
+    # NewsAPI
+    newsapi_url = (
+        f"https://newsapi.org/v2/everything?q={query}"
+        f"&from={from_date}&to={to_date}"
+        f"&sortBy=publishedAt&language=en&pageSize={max_results}&apiKey={NEWSAPI_API_KEY}"
+    )
+    newsapi_response = requests.get(newsapi_url)
+    if newsapi_response.status_code == 200:
+        newsapi_articles = newsapi_response.json().get("articles", [])
+        articles.extend(newsapi_articles)
+    else:
+        print(f"NewsAPI error: {newsapi_response.status_code}")
+    # Combine and deduplicate by title
+    combined_articles = {article.get('title', f"no-title-{i}"): article for i, article in enumerate(articles)}
+    return list(combined_articles.values())

reddit_search.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from dotenv import load_dotenv
+import praw
+# Load Reddit API credentials from your .env file
+load_dotenv()
+REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
+REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
+REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT", "SentimentApp/0.1 by YourUsername")
+# Initialize Reddit client
+reddit = praw.Reddit(
+    client_id=REDDIT_CLIENT_ID,
+    client_secret=REDDIT_CLIENT_SECRET,
+    user_agent=REDDIT_USER_AGENT,
+)
+def search_reddit(keywords, subreddit=None, limit=20):
+    """
+    Search Reddit posts for each keyword in one or more subreddits.
+    If `subreddit` is a comma-separated string, searches all listed subreddits.
+    If blank, searches all of Reddit.
+    Returns a list of dicts: {body, subreddit, score}
+    """
+    results = []
+    # Parse and clean subreddit input (comma separated)
+    subreddit_list = []
+    if subreddit and subreddit.strip():
+        subreddit_list = [sr.strip() for sr in subreddit.split(",") if sr.strip()]
+    for keyword in keywords:
+        if subreddit_list:
+            # Search in each subreddit separately
+            for sr in subreddit_list:
+                try:
+                    submissions = reddit.subreddit(sr).search(keyword, limit=limit)
+                    for post in submissions:
+                        results.append({
+                            "body": post.title + "\n" + (post.selftext or ""),
+                            "subreddit": post.subreddit.display_name,
+                            "score": post.score,
+                        })
+                except Exception as e:
+                    print(f"Error searching subreddit '{sr}': {e}")
+        else:
+            # Search all of Reddit
+            try:
+                submissions = reddit.subreddit("all").search(keyword, limit=limit)
+                for post in submissions:
+                    results.append({
+                        "body": post.title + "\n" + (post.selftext or ""),
+                        "subreddit": post.subreddit.display_name,
+                        "score": post.score,
+                    })
+            except Exception as e:
+                print(f"Error searching all of Reddit: {e}")
+    return results

results_compiler.py ADDED Viewed

	@@ -0,0 +1,29 @@

+def compile_results(articles, keywords, reddit_data, sentiment, max_articles=5, max_keywords=10, max_comments=30):
+    # Clean and simplify articles
+    simplified_articles = [
+        {
+            "title": article.get("title", ""),
+            "description": article.get("description", ""),
+            "url": article.get("url", "")
+        }
+        for article in articles[:max_articles]
+    ]
+    # Limit keywords for display
+    clean_keywords = keywords[:max_keywords] if isinstance(keywords, list) else keywords
+    # Clean up reddit data (flat list of comments, not grouped by keyword)
+    reddit_output = []
+    for comment in reddit_data[:max_comments]:
+        reddit_output.append({
+            "comment": comment.get("body", ""),
+            "subreddit": comment.get("subreddit", ""),
+            "score": comment.get("score", ""),
+            "sentiment": sentiment.get(comment.get("body", ""), "N/A"),
+        })
+    return {
+        "articles": simplified_articles,
+        "top_keywords": clean_keywords,
+        "reddit_comments": reddit_output
+    }

sentiment_analyzer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from transformers import pipeline
+# Load sentiment analysis model (loads only once at startup)
+sentiment_model = pipeline(
+    "sentiment-analysis",
+    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
+    device=-1  # Always CPU, for Hugging Face Spaces
+)
+def analyze_sentiment(reddit_data):
+    """
+    Accepts:
+      - dict: {keyword: [list of comment dicts]}
+      - list: [comment dicts]
+    Returns list of dicts: [{"body": ..., "sentiment": ...}, ...]
+    """
+    sentiments = []
+    if isinstance(reddit_data, dict):
+        comments_iter = []
+        for comments in reddit_data.values():
+            comments_iter.extend(comments)
+    elif isinstance(reddit_data, list):
+        comments_iter = reddit_data
+    else:
+        return sentiments  # Unexpected input
+    for comment in comments_iter:
+        body = comment.get("body", "")
+        if not body:
+            continue
+        try:
+            result = sentiment_model(body[:512])[0]  # Truncate for the model
+            label = result["label"].lower()
+        except Exception as e:
+            label = "error"
+        sentiments.append({"body": body, "sentiment": label})
+    return sentiments