Update app.py
Browse files
app.py
CHANGED
|
@@ -5,64 +5,106 @@ from sentiment_analyzer import analyze_sentiment
|
|
| 5 |
from reddit_search import search_reddit
|
| 6 |
import pandas as pd
|
| 7 |
import plotly.express as px
|
|
|
|
| 8 |
|
| 9 |
-
st.set_page_config(
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
"""
|
| 14 |
-
π **About:** This dashboard finds the latest news about a topic, extracts trending keywords,
|
| 15 |
-
and analyzes public sentiment from Reddit using state-of-the-art AI.
|
| 16 |
-
\n
|
| 17 |
-
_Educational demonstration only. Does not represent any official views._
|
| 18 |
-
"""
|
| 19 |
)
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
options=[("Last 24 hours", 1), ("Last 7 days", 7)],
|
| 25 |
-
format_func=lambda x: x[0]
|
| 26 |
)
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
query = st.text_input("Enter your topic or query:", value="Enter Value Here")
|
| 33 |
max_articles = st.slider("Number of news articles:", 5, 25, 12)
|
| 34 |
|
| 35 |
-
# --- CLEANING FUNCTION ---
|
| 36 |
def clean_keywords(keywords):
|
| 37 |
-
"""
|
| 38 |
-
Remove blanks, punctuation-only, and duplicates (case-insensitive).
|
| 39 |
-
Returns a cleaned list of keywords.
|
| 40 |
-
"""
|
| 41 |
cleaned = []
|
| 42 |
seen = set()
|
| 43 |
for kw in keywords:
|
| 44 |
kw = kw.strip()
|
| 45 |
-
# Only keep if non-empty and contains at least one alphanumeric character
|
| 46 |
if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
|
| 47 |
cleaned.append(kw)
|
| 48 |
seen.add(kw.lower())
|
| 49 |
return cleaned
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
progress = st.progress(0, text="Fetching news...")
|
| 54 |
|
| 55 |
-
#
|
| 56 |
progress.progress(10, text="Fetching news articles...")
|
| 57 |
articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
|
| 58 |
|
| 59 |
if articles:
|
| 60 |
progress.progress(40, text="Extracting keywords...")
|
| 61 |
keywords = extract_keywords(articles)
|
| 62 |
-
|
| 63 |
-
# --- Clean up keywords ---
|
| 64 |
keywords = clean_keywords(keywords)
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
progress.progress(60, text="Searching Reddit...")
|
| 68 |
reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
|
|
@@ -70,17 +112,20 @@ if st.button("Search"):
|
|
| 70 |
progress.progress(80, text="Analyzing sentiment...")
|
| 71 |
sentiment_results = analyze_sentiment(reddit_data)
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
|
| 75 |
|
| 76 |
-
#
|
| 77 |
results_df = pd.DataFrame(reddit_data)
|
| 78 |
results_df['sentiment'] = sentiment_results
|
| 79 |
|
| 80 |
-
|
| 81 |
-
st.dataframe(results_df)
|
| 82 |
-
|
| 83 |
-
# Robust and crash-proof sentiment plot!
|
| 84 |
sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
|
| 85 |
sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
|
| 86 |
sentiment_counts = sentiment_counts.rename(str)
|
|
@@ -91,16 +136,47 @@ if st.button("Search"):
|
|
| 91 |
'Count': sentiment_counts.values
|
| 92 |
})
|
| 93 |
|
|
|
|
|
|
|
|
|
|
| 94 |
if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
|
| 95 |
fig = px.bar(
|
| 96 |
sentiment_df,
|
| 97 |
x='Sentiment',
|
| 98 |
y='Count',
|
|
|
|
|
|
|
|
|
|
| 99 |
labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
|
| 100 |
title='Sentiment Distribution'
|
| 101 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
st.plotly_chart(fig, use_container_width=True)
|
| 103 |
else:
|
| 104 |
-
st.info("No valid sentiment data for plotting.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
else:
|
| 106 |
-
st.warning("No news articles found for that query. Try a different topic or broaden the date range.")
|
|
|
|
| 5 |
from reddit_search import search_reddit
|
| 6 |
import pandas as pd
|
| 7 |
import plotly.express as px
|
| 8 |
+
import requests
|
| 9 |
|
| 10 |
+
st.set_page_config(
|
| 11 |
+
page_title="INDOPACOM Sentiment Dashboard",
|
| 12 |
+
layout="wide",
|
| 13 |
+
initial_sidebar_state="expanded"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
)
|
| 15 |
|
| 16 |
+
st.markdown(
|
| 17 |
+
"<h1 style='text-align:center; color:#183153; font-weight:900;'>π Military Sentiment Dashboard</h1>",
|
| 18 |
+
unsafe_allow_html=True
|
|
|
|
|
|
|
| 19 |
)
|
| 20 |
+
st.markdown(
|
| 21 |
+
"<h4 style='text-align:center; color:#375a7f;'>AI-powered OSINT: See what the news & social web really think</h4>",
|
| 22 |
+
unsafe_allow_html=True
|
| 23 |
+
)
|
| 24 |
+
st.markdown("---")
|
| 25 |
+
|
| 26 |
+
# --- Date range, subreddit, and topic input row ---
|
| 27 |
+
col1, col2, col3 = st.columns([1, 1, 2])
|
| 28 |
+
with col1:
|
| 29 |
+
date_range = st.selectbox(
|
| 30 |
+
"Search news from:",
|
| 31 |
+
options=[("Last 24 hours", 1), ("Last 7 days", 7)],
|
| 32 |
+
format_func=lambda x: x[0]
|
| 33 |
+
)
|
| 34 |
+
selected_days = date_range[1]
|
| 35 |
+
with col2:
|
| 36 |
+
subreddit = st.text_input(
|
| 37 |
+
"Subreddit (optional)",
|
| 38 |
+
value="",
|
| 39 |
+
help="e.g. 'Military', 'worldnews', or leave blank for all"
|
| 40 |
+
)
|
| 41 |
+
with col3:
|
| 42 |
+
query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
|
| 43 |
|
|
|
|
| 44 |
max_articles = st.slider("Number of news articles:", 5, 25, 12)
|
| 45 |
|
|
|
|
| 46 |
def clean_keywords(keywords):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
cleaned = []
|
| 48 |
seen = set()
|
| 49 |
for kw in keywords:
|
| 50 |
kw = kw.strip()
|
|
|
|
| 51 |
if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
|
| 52 |
cleaned.append(kw)
|
| 53 |
seen.add(kw.lower())
|
| 54 |
return cleaned
|
| 55 |
|
| 56 |
+
# --- AI SUMMARY FUNCTION ---
|
| 57 |
+
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
|
| 58 |
+
prompt = (
|
| 59 |
+
f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
|
| 60 |
+
f"Query: {user_query}\n"
|
| 61 |
+
f"Keywords found: {', '.join(keywords[:8])}...\n"
|
| 62 |
+
f"Sentiment counts: {dict(sentiment_counts)}\n"
|
| 63 |
+
f"Most active subreddits: {', '.join(top_subreddits)}\n"
|
| 64 |
+
f"Example Reddit post titles: {', '.join(top_posts)}\n"
|
| 65 |
+
"Then, suggest 3-5 additional related search terms that could improve situational awareness."
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
|
| 69 |
+
try:
|
| 70 |
+
resp = requests.post(url, json={"inputs": prompt}, timeout=60)
|
| 71 |
+
resp.raise_for_status()
|
| 72 |
+
output = resp.json()
|
| 73 |
+
# Output format can vary; support both possible types:
|
| 74 |
+
if isinstance(output, list) and "generated_text" in output[0]:
|
| 75 |
+
return output[0]["generated_text"]
|
| 76 |
+
elif isinstance(output, dict) and "generated_text" in output:
|
| 77 |
+
return output["generated_text"]
|
| 78 |
+
else:
|
| 79 |
+
return "Summary unavailable (unexpected API output)."
|
| 80 |
+
except Exception as e:
|
| 81 |
+
return f"Summary unavailable (LLM error: {e})"
|
| 82 |
+
|
| 83 |
+
st.markdown("---")
|
| 84 |
+
|
| 85 |
+
if st.button("π Analyze!"):
|
| 86 |
progress = st.progress(0, text="Fetching news...")
|
| 87 |
|
| 88 |
+
# Fetch news
|
| 89 |
progress.progress(10, text="Fetching news articles...")
|
| 90 |
articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
|
| 91 |
|
| 92 |
if articles:
|
| 93 |
progress.progress(40, text="Extracting keywords...")
|
| 94 |
keywords = extract_keywords(articles)
|
|
|
|
|
|
|
| 95 |
keywords = clean_keywords(keywords)
|
| 96 |
+
|
| 97 |
+
st.markdown("#### π° News Stories")
|
| 98 |
+
with st.expander("View fetched news stories", expanded=False):
|
| 99 |
+
for art in articles:
|
| 100 |
+
st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
|
| 101 |
+
f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
|
| 102 |
+
unsafe_allow_html=True)
|
| 103 |
+
st.markdown("---")
|
| 104 |
+
|
| 105 |
+
st.markdown("#### π·οΈ Extracted Keywords")
|
| 106 |
+
st.info(", ".join(keywords) if keywords else "No keywords found.", icon="π")
|
| 107 |
+
st.markdown("---")
|
| 108 |
|
| 109 |
progress.progress(60, text="Searching Reddit...")
|
| 110 |
reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
|
|
|
|
| 112 |
progress.progress(80, text="Analyzing sentiment...")
|
| 113 |
sentiment_results = analyze_sentiment(reddit_data)
|
| 114 |
|
| 115 |
+
st.markdown("#### πΎ Reddit Posts")
|
| 116 |
+
if reddit_data:
|
| 117 |
+
st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
|
| 118 |
+
else:
|
| 119 |
+
st.warning("No Reddit posts found for these keywords.", icon="β οΈ")
|
| 120 |
+
st.markdown("---")
|
| 121 |
+
|
| 122 |
st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
|
| 123 |
|
| 124 |
+
# --- Sentiment Results Table & Plot ---
|
| 125 |
results_df = pd.DataFrame(reddit_data)
|
| 126 |
results_df['sentiment'] = sentiment_results
|
| 127 |
|
| 128 |
+
st.markdown("#### π§ Sentiment Analysis")
|
|
|
|
|
|
|
|
|
|
| 129 |
sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
|
| 130 |
sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
|
| 131 |
sentiment_counts = sentiment_counts.rename(str)
|
|
|
|
| 136 |
'Count': sentiment_counts.values
|
| 137 |
})
|
| 138 |
|
| 139 |
+
# Nice color palette for bars
|
| 140 |
+
palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']
|
| 141 |
+
|
| 142 |
if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
|
| 143 |
fig = px.bar(
|
| 144 |
sentiment_df,
|
| 145 |
x='Sentiment',
|
| 146 |
y='Count',
|
| 147 |
+
color='Sentiment',
|
| 148 |
+
color_discrete_sequence=palette,
|
| 149 |
+
text='Count',
|
| 150 |
labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
|
| 151 |
title='Sentiment Distribution'
|
| 152 |
)
|
| 153 |
+
fig.update_traces(marker_line_width=1, textposition="outside")
|
| 154 |
+
fig.update_layout(
|
| 155 |
+
yaxis=dict(title='Count'),
|
| 156 |
+
xaxis=dict(title='Sentiment'),
|
| 157 |
+
showlegend=False,
|
| 158 |
+
plot_bgcolor="#f8fafc",
|
| 159 |
+
paper_bgcolor="#f8fafc",
|
| 160 |
+
font=dict(size=15),
|
| 161 |
+
margin=dict(t=60, b=60, r=40, l=40)
|
| 162 |
+
)
|
| 163 |
st.plotly_chart(fig, use_container_width=True)
|
| 164 |
else:
|
| 165 |
+
st.info("No valid sentiment data for plotting.", icon="πΆ")
|
| 166 |
+
|
| 167 |
+
# --- AI SUMMARY SECTION ---
|
| 168 |
+
# Get top 3 subreddits and top 3 post titles for summary
|
| 169 |
+
top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
|
| 170 |
+
top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []
|
| 171 |
+
|
| 172 |
+
st.markdown("### π AI-Generated Summary & Suggestions")
|
| 173 |
+
summary = get_summary_with_hf_llm(
|
| 174 |
+
keywords=keywords,
|
| 175 |
+
sentiment_counts=sentiment_counts,
|
| 176 |
+
top_subreddits=top_subreddits,
|
| 177 |
+
top_posts=top_posts,
|
| 178 |
+
user_query=query
|
| 179 |
+
)
|
| 180 |
+
st.info(summary)
|
| 181 |
else:
|
| 182 |
+
st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="π°")
|