Sami2000's picture
Update app.py
4ad7c91 verified
import streamlit as st
from news_fetcher import fetch_news
from keyword_extractor import extract_keywords
from sentiment_analyzer import analyze_sentiment
from reddit_search import search_reddit
import pandas as pd
import plotly.express as px
import requests
st.set_page_config(
page_title="INDOPACOM Sentiment Dashboard",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown(
"<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Pseudo Military Sentiment Dashboard</h1>",
unsafe_allow_html=True
)
st.markdown(
"<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>",
unsafe_allow_html=True
)
st.markdown("---")
# --- Date range, subreddit, and topic input row ---
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
date_range = st.selectbox(
"Search news from:",
options=[("Last 24 hours", 1), ("Last 7 days", 7)],
format_func=lambda x: x[0]
)
selected_days = date_range[1]
with col2:
subreddit = st.text_input(
"Subreddit (optional)",
value="",
help="e.g. 'Military', 'worldnews', or leave blank for all"
)
with col3:
query = st.text_input("Enter your topic or query:", value="Enter Value Here")
max_articles = st.slider("Number of news articles:", 5, 25, 12)
def clean_keywords(keywords):
cleaned = []
seen = set()
for kw in keywords:
kw = kw.strip()
if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
cleaned.append(kw)
seen.add(kw.lower())
return cleaned
# --- AI SUMMARY FUNCTION ---
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
prompt = (
f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
f"Query: {user_query}\n"
f"Keywords found: {', '.join(keywords[:8])}...\n"
f"Sentiment counts: {dict(sentiment_counts)}\n"
f"Most active subreddits: {', '.join(top_subreddits)}\n"
f"Example Reddit post titles: {', '.join(top_posts)}\n"
"Then, suggest 3-5 additional related search terms that could improve situational awareness."
)
url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
try:
resp = requests.post(url, json={"inputs": prompt}, timeout=60)
resp.raise_for_status()
output = resp.json()
if isinstance(output, list) and "generated_text" in output[0]:
return output[0]["generated_text"]
elif isinstance(output, dict) and "generated_text" in output:
return output["generated_text"]
else:
return "Summary unavailable (unexpected API output)."
except Exception as e:
return f"Summary unavailable (LLM error: {e})"
st.markdown("---")
if st.button("πŸš€ Analyze!"):
progress = st.progress(0, text="Fetching news...")
# Fetch news
progress.progress(10, text="Fetching news articles...")
articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
if articles:
progress.progress(40, text="Extracting keywords...")
keywords = extract_keywords(articles)
keywords = clean_keywords(keywords)
st.markdown("#### πŸ“° News Stories")
with st.expander("View fetched news stories", expanded=False):
for art in articles:
st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
unsafe_allow_html=True)
st.markdown("---")
st.markdown("#### 🏷️ Extracted Keywords")
st.info(", ".join(keywords) if keywords else "No keywords found.", icon="πŸ”‘")
st.markdown("---")
progress.progress(60, text="Searching Reddit...")
reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
progress.progress(80, text="Analyzing sentiment...")
sentiment_results = analyze_sentiment(reddit_data)
st.markdown("#### πŸ‘Ύ Reddit Posts")
if reddit_data:
st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
else:
st.warning("No Reddit posts found for these keywords.", icon="⚠️")
st.markdown("---")
st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
# --- Sentiment Results Table & Pie/Donut Chart ---
results_df = pd.DataFrame(reddit_data)
results_df['sentiment'] = sentiment_results
st.markdown("#### 🧠 Sentiment Analysis")
sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
sentiment_counts = sentiment_counts.rename(str)
sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != '']
sentiment_df = pd.DataFrame({
'Sentiment': sentiment_counts.index,
'Count': sentiment_counts.values
})
# Color palette
palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']
if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
fig = px.pie(
sentiment_df,
names='Sentiment',
values='Count',
color='Sentiment',
color_discrete_sequence=palette,
title='Sentiment Breakdown',
hole=0.45 # Donut chart!
)
fig.update_traces(
textposition='inside',
textinfo='percent+label+value'
)
fig.update_layout(
showlegend=True,
plot_bgcolor="#f8fafc",
paper_bgcolor="#f8fafc",
font=dict(size=15),
margin=dict(t=60, b=60, r=40, l=40)
)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("No valid sentiment data for plotting.", icon="😢")
# --- AI SUMMARY SECTION ---
top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []
st.markdown("### πŸ“ AI-Generated Summary & Suggestions")
summary = get_summary_with_hf_llm(
keywords=keywords,
sentiment_counts=sentiment_counts,
top_subreddits=top_subreddits,
top_posts=top_posts,
user_query=query
)
st.info(summary)
else:
st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="πŸ“°")