Sami2000 commited on
Commit
12d8780
·
verified ·
1 Parent(s): 375f97c

Upload 7 files

Browse files
Files changed (7) hide show
  1. app.py +112 -0
  2. keyword_extractor.py +30 -0
  3. main.py +27 -0
  4. news_fetcher.py +53 -0
  5. reddit_search.py +59 -0
  6. results_compiler.py +29 -0
  7. sentiment_analyzer.py +37 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from news_fetcher import fetch_news
3
+ from keyword_extractor import extract_keywords
4
+ from sentiment_analyzer import analyze_sentiment
5
+ from reddit_search import search_reddit
6
+ import pandas as pd
7
+ import plotly.express as px
8
+
9
+ st.set_page_config(page_title="INDOPACOM Sentiment Dashboard", layout="wide")
10
+
11
+ st.title("Military Sentiment Dashboard")
12
+ st.write(
13
+ """
14
+ 🔎 **About:** This dashboard finds the latest news about a topic, extracts trending keywords,
15
+ and analyzes public sentiment from Reddit using state-of-the-art AI.
16
+ \n
17
+ _Educational demonstration only. Does not represent any official views._
18
+ """
19
+ )
20
+
21
+ # --- NEW: Date range selection ---
22
+ date_range = st.selectbox(
23
+ "Search news from:",
24
+ options=[("Last 24 hours", 1), ("Last 7 days", 7)],
25
+ format_func=lambda x: x[0]
26
+ )
27
+ selected_days = date_range[1]
28
+
29
+ # --- NEW: Subreddit input ---
30
+ subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'worldnews'). Leave blank for all.", value="")
31
+
32
+ query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
33
+ max_articles = st.slider("Number of news articles:", 5, 25, 12)
34
+
35
+ if st.button("Search"):
36
+ # --- Fancy progress bar ---
37
+ progress = st.progress(0, text="Fetching news...")
38
+
39
+ # Step 1: Fetch news
40
+ progress.progress(10, text="Fetching news articles...")
41
+ articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
42
+
43
+ if articles:
44
+ progress.progress(40, text="Extracting keywords...")
45
+ keywords = extract_keywords(articles)
46
+
47
+ progress.progress(60, text="Searching Reddit...")
48
+ reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
49
+
50
+ progress.progress(80, text="Analyzing sentiment...")
51
+ sentiments = analyze_sentiment(reddit_data)
52
+ progress.progress(100, text="Done!")
53
+
54
+ tab1, tab2, tab3, tab4 = st.tabs(["News", "Keywords", "Reddit", "Sentiment"])
55
+
56
+ with tab1:
57
+ st.subheader("News Articles")
58
+ st.dataframe([
59
+ {
60
+ "Title": a.get("title", ""),
61
+ "Source": a.get("source", ""),
62
+ "Published": a.get("publishedAt", ""),
63
+ "URL": a.get("url", "")
64
+ } for a in articles[:max_articles]
65
+ ])
66
+
67
+ with tab2:
68
+ st.subheader("Top Keywords")
69
+ st.write(", ".join(keywords))
70
+
71
+ with tab3:
72
+ st.subheader("Reddit Comments")
73
+ if reddit_data:
74
+ comments = []
75
+ if isinstance(reddit_data, dict):
76
+ for v in reddit_data.values():
77
+ comments.extend(v)
78
+ elif isinstance(reddit_data, list):
79
+ comments = reddit_data
80
+
81
+ if comments:
82
+ st.dataframe([
83
+ {
84
+ "Comment": c.get("body", "")[:140] + ("..." if len(c.get("body", "")) > 140 else ""),
85
+ "Subreddit": c.get("subreddit", ""),
86
+ "Upvotes": c.get("score", ""),
87
+ }
88
+ for c in comments[:30]
89
+ ])
90
+ else:
91
+ st.info("No Reddit comments found.")
92
+ else:
93
+ st.info("No Reddit data found.")
94
+
95
+ with tab4:
96
+ st.subheader("Sentiment Results")
97
+ if sentiments:
98
+ df = pd.DataFrame(sentiments)
99
+ st.dataframe(df[["body", "sentiment"]].rename(columns={"body": "Comment"}))
100
+ # Show pie chart of sentiment
101
+ sentiment_counts = df["sentiment"].value_counts().reset_index()
102
+ sentiment_counts.columns = ["Sentiment", "Count"]
103
+ fig = px.pie(sentiment_counts, names="Sentiment", values="Count",
104
+ title="Sentiment Distribution")
105
+ st.plotly_chart(fig, use_container_width=True)
106
+ else:
107
+ st.info("No sentiment data found.")
108
+
109
+ else:
110
+ progress.empty()
111
+ st.warning("No articles found for your search. Try a different query.")
112
+ # --- END OF DASHBOARD CODE ---
keyword_extractor.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # keyword_extractor.py (using KeyBERT, no OpenAI required)
2
+
3
+ from keybert import KeyBERT
4
+
5
+ # Initialize KeyBERT with a small, efficient model
6
+ kw_model = KeyBERT('all-MiniLM-L6-v2')
7
+
8
+ def extract_keywords(articles, num_keywords=10):
9
+ """
10
+ articles: list of dicts, each with 'title' and 'content'
11
+ returns: list of unique keywords/phrases (strings)
12
+ """
13
+ all_text = " ".join([
14
+ art.get("title", "") + " " + art.get("content", "")
15
+ for art in articles if art
16
+ ])
17
+ # Extract top keywords and phrases
18
+ keywords = kw_model.extract_keywords(
19
+ all_text,
20
+ keyphrase_ngram_range=(1, 2),
21
+ stop_words='english',
22
+ top_n=num_keywords
23
+ )
24
+ # keywords is a list of tuples: [(keyword, score), ...]
25
+ keywords = [kw for kw, score in keywords]
26
+ return keywords
27
+
28
+
29
+
30
+
main.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ from news_fetcher import fetch_news
3
+ from keyword_extractor import extract_keywords
4
+ from reddit_search import search_reddit
5
+ from sentiment_analyzer import analyze_sentiment
6
+ from results_compiler import compile_results
7
+
8
+ app = FastAPI()
9
+
10
+ @app.get("/")
11
+ def read_root():
12
+ return {"message": "Hello, this is your INDOPACOM Sentiment App backend!"}
13
+
14
+ @app.get("/run_workflow")
15
+ def run_workflow(query: str = Query("US Army INDOPACOM")): # <--- add Query parameter
16
+ articles = fetch_news(query=query) # <--- pass user query to fetch_news
17
+ print(f"Fetched {len(articles)} articles for query: {query}")
18
+ keywords = extract_keywords(articles)
19
+ print(f"Extracted keywords: {keywords}")
20
+ reddit_data = search_reddit(keywords)
21
+ print(f"Reddit data: {reddit_data}")
22
+ sentiment = analyze_sentiment(reddit_data)
23
+ print(f"Sentiment: {sentiment}")
24
+ results = compile_results(articles, keywords, reddit_data, sentiment)
25
+ return results
26
+
27
+
news_fetcher.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # On Hugging Face, just call load_dotenv() (or omit if you want)
5
+ load_dotenv()
6
+
7
+ import requests
8
+ from datetime import datetime, timedelta
9
+
10
+ GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")
11
+ NEWSAPI_API_KEY = os.getenv("NEWSAPI_API_KEY")
12
+
13
+ def fetch_news(query="US Army INDOPACOM", days=1, max_results=10):
14
+ if not GNEWS_API_KEY or not NEWSAPI_API_KEY:
15
+ print("Missing API keys! Check your Hugging Face secrets.")
16
+ return []
17
+
18
+ today = datetime.utcnow()
19
+ yesterday = today - timedelta(days=days)
20
+ from_date = yesterday.strftime("%Y-%m-%d")
21
+ to_date = today.strftime("%Y-%m-%d")
22
+
23
+ articles = []
24
+
25
+ # GNews API
26
+ gnews_url = (
27
+ f"https://gnews.io/api/v4/search?q={query}"
28
+ f"&from={from_date}&to={to_date}"
29
+ f"&lang=en&max={max_results}&apikey={GNEWS_API_KEY}"
30
+ )
31
+ gnews_response = requests.get(gnews_url)
32
+ if gnews_response.status_code == 200:
33
+ gnews_articles = gnews_response.json().get("articles", [])
34
+ articles.extend(gnews_articles)
35
+ else:
36
+ print(f"GNews API error: {gnews_response.status_code}")
37
+
38
+ # NewsAPI
39
+ newsapi_url = (
40
+ f"https://newsapi.org/v2/everything?q={query}"
41
+ f"&from={from_date}&to={to_date}"
42
+ f"&sortBy=publishedAt&language=en&pageSize={max_results}&apiKey={NEWSAPI_API_KEY}"
43
+ )
44
+ newsapi_response = requests.get(newsapi_url)
45
+ if newsapi_response.status_code == 200:
46
+ newsapi_articles = newsapi_response.json().get("articles", [])
47
+ articles.extend(newsapi_articles)
48
+ else:
49
+ print(f"NewsAPI error: {newsapi_response.status_code}")
50
+
51
+ # Combine and deduplicate by title
52
+ combined_articles = {article.get('title', f"no-title-{i}"): article for i, article in enumerate(articles)}
53
+ return list(combined_articles.values())
reddit_search.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import praw
4
+
5
+ # Load Reddit API credentials from your .env file
6
+ load_dotenv()
7
+ REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
8
+ REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
9
+ REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT", "SentimentApp/0.1 by YourUsername")
10
+
11
+ # Initialize Reddit client
12
+ reddit = praw.Reddit(
13
+ client_id=REDDIT_CLIENT_ID,
14
+ client_secret=REDDIT_CLIENT_SECRET,
15
+ user_agent=REDDIT_USER_AGENT,
16
+ )
17
+
18
+ def search_reddit(keywords, subreddit=None, limit=20):
19
+ """
20
+ Search Reddit posts for each keyword in one or more subreddits.
21
+ If `subreddit` is a comma-separated string, searches all listed subreddits.
22
+ If blank, searches all of Reddit.
23
+ Returns a list of dicts: {body, subreddit, score}
24
+ """
25
+ results = []
26
+
27
+ # Parse and clean subreddit input (comma separated)
28
+ subreddit_list = []
29
+ if subreddit and subreddit.strip():
30
+ subreddit_list = [sr.strip() for sr in subreddit.split(",") if sr.strip()]
31
+
32
+ for keyword in keywords:
33
+ if subreddit_list:
34
+ # Search in each subreddit separately
35
+ for sr in subreddit_list:
36
+ try:
37
+ submissions = reddit.subreddit(sr).search(keyword, limit=limit)
38
+ for post in submissions:
39
+ results.append({
40
+ "body": post.title + "\n" + (post.selftext or ""),
41
+ "subreddit": post.subreddit.display_name,
42
+ "score": post.score,
43
+ })
44
+ except Exception as e:
45
+ print(f"Error searching subreddit '{sr}': {e}")
46
+ else:
47
+ # Search all of Reddit
48
+ try:
49
+ submissions = reddit.subreddit("all").search(keyword, limit=limit)
50
+ for post in submissions:
51
+ results.append({
52
+ "body": post.title + "\n" + (post.selftext or ""),
53
+ "subreddit": post.subreddit.display_name,
54
+ "score": post.score,
55
+ })
56
+ except Exception as e:
57
+ print(f"Error searching all of Reddit: {e}")
58
+
59
+ return results
results_compiler.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def compile_results(articles, keywords, reddit_data, sentiment, max_articles=5, max_keywords=10, max_comments=30):
2
+ # Clean and simplify articles
3
+ simplified_articles = [
4
+ {
5
+ "title": article.get("title", ""),
6
+ "description": article.get("description", ""),
7
+ "url": article.get("url", "")
8
+ }
9
+ for article in articles[:max_articles]
10
+ ]
11
+
12
+ # Limit keywords for display
13
+ clean_keywords = keywords[:max_keywords] if isinstance(keywords, list) else keywords
14
+
15
+ # Clean up reddit data (flat list of comments, not grouped by keyword)
16
+ reddit_output = []
17
+ for comment in reddit_data[:max_comments]:
18
+ reddit_output.append({
19
+ "comment": comment.get("body", ""),
20
+ "subreddit": comment.get("subreddit", ""),
21
+ "score": comment.get("score", ""),
22
+ "sentiment": sentiment.get(comment.get("body", ""), "N/A"),
23
+ })
24
+
25
+ return {
26
+ "articles": simplified_articles,
27
+ "top_keywords": clean_keywords,
28
+ "reddit_comments": reddit_output
29
+ }
sentiment_analyzer.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ # Load sentiment analysis model (loads only once at startup)
4
+ sentiment_model = pipeline(
5
+ "sentiment-analysis",
6
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
7
+ device=-1 # Always CPU, for Hugging Face Spaces
8
+ )
9
+
10
+ def analyze_sentiment(reddit_data):
11
+ """
12
+ Accepts:
13
+ - dict: {keyword: [list of comment dicts]}
14
+ - list: [comment dicts]
15
+ Returns list of dicts: [{"body": ..., "sentiment": ...}, ...]
16
+ """
17
+ sentiments = []
18
+ if isinstance(reddit_data, dict):
19
+ comments_iter = []
20
+ for comments in reddit_data.values():
21
+ comments_iter.extend(comments)
22
+ elif isinstance(reddit_data, list):
23
+ comments_iter = reddit_data
24
+ else:
25
+ return sentiments # Unexpected input
26
+
27
+ for comment in comments_iter:
28
+ body = comment.get("body", "")
29
+ if not body:
30
+ continue
31
+ try:
32
+ result = sentiment_model(body[:512])[0] # Truncate for the model
33
+ label = result["label"].lower()
34
+ except Exception as e:
35
+ label = "error"
36
+ sentiments.append({"body": body, "sentiment": label})
37
+ return sentiments