|
|
import streamlit as st |
|
|
from news_fetcher import fetch_news |
|
|
from keyword_extractor import extract_keywords |
|
|
from sentiment_analyzer import analyze_sentiment |
|
|
from reddit_search import search_reddit |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
import requests |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="INDOPACOM Sentiment Dashboard", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
st.markdown( |
|
|
"<h1 style='text-align:center; color:#183153; font-weight:900;'>π Pseudo Military Sentiment Dashboard</h1>", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
st.markdown( |
|
|
"<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>", |
|
|
unsafe_allow_html=True |
|
|
) |
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
col1, col2, col3 = st.columns([1, 1, 2]) |
|
|
with col1: |
|
|
date_range = st.selectbox( |
|
|
"Search news from:", |
|
|
options=[("Last 24 hours", 1), ("Last 7 days", 7)], |
|
|
format_func=lambda x: x[0] |
|
|
) |
|
|
selected_days = date_range[1] |
|
|
with col2: |
|
|
subreddit = st.text_input( |
|
|
"Subreddit (optional)", |
|
|
value="", |
|
|
help="e.g. 'Military', 'worldnews', or leave blank for all" |
|
|
) |
|
|
with col3: |
|
|
query = st.text_input("Enter your topic or query:", value="Enter Value Here") |
|
|
|
|
|
max_articles = st.slider("Number of news articles:", 5, 25, 12) |
|
|
|
|
|
def clean_keywords(keywords): |
|
|
cleaned = [] |
|
|
seen = set() |
|
|
for kw in keywords: |
|
|
kw = kw.strip() |
|
|
if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen: |
|
|
cleaned.append(kw) |
|
|
seen.add(kw.lower()) |
|
|
return cleaned |
|
|
|
|
|
|
|
|
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query): |
|
|
prompt = ( |
|
|
f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n" |
|
|
f"Query: {user_query}\n" |
|
|
f"Keywords found: {', '.join(keywords[:8])}...\n" |
|
|
f"Sentiment counts: {dict(sentiment_counts)}\n" |
|
|
f"Most active subreddits: {', '.join(top_subreddits)}\n" |
|
|
f"Example Reddit post titles: {', '.join(top_posts)}\n" |
|
|
"Then, suggest 3-5 additional related search terms that could improve situational awareness." |
|
|
) |
|
|
|
|
|
url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" |
|
|
try: |
|
|
resp = requests.post(url, json={"inputs": prompt}, timeout=60) |
|
|
resp.raise_for_status() |
|
|
output = resp.json() |
|
|
if isinstance(output, list) and "generated_text" in output[0]: |
|
|
return output[0]["generated_text"] |
|
|
elif isinstance(output, dict) and "generated_text" in output: |
|
|
return output["generated_text"] |
|
|
else: |
|
|
return "Summary unavailable (unexpected API output)." |
|
|
except Exception as e: |
|
|
return f"Summary unavailable (LLM error: {e})" |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
if st.button("π Analyze!"): |
|
|
progress = st.progress(0, text="Fetching news...") |
|
|
|
|
|
|
|
|
progress.progress(10, text="Fetching news articles...") |
|
|
articles = fetch_news(query=query, days=selected_days, max_results=max_articles) |
|
|
|
|
|
if articles: |
|
|
progress.progress(40, text="Extracting keywords...") |
|
|
keywords = extract_keywords(articles) |
|
|
keywords = clean_keywords(keywords) |
|
|
|
|
|
st.markdown("#### π° News Stories") |
|
|
with st.expander("View fetched news stories", expanded=False): |
|
|
for art in articles: |
|
|
st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>" |
|
|
f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>", |
|
|
unsafe_allow_html=True) |
|
|
st.markdown("---") |
|
|
|
|
|
st.markdown("#### π·οΈ Extracted Keywords") |
|
|
st.info(", ".join(keywords) if keywords else "No keywords found.", icon="π") |
|
|
st.markdown("---") |
|
|
|
|
|
progress.progress(60, text="Searching Reddit...") |
|
|
reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None) |
|
|
|
|
|
progress.progress(80, text="Analyzing sentiment...") |
|
|
sentiment_results = analyze_sentiment(reddit_data) |
|
|
|
|
|
st.markdown("#### πΎ Reddit Posts") |
|
|
if reddit_data: |
|
|
st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180) |
|
|
else: |
|
|
st.warning("No Reddit posts found for these keywords.", icon="β οΈ") |
|
|
st.markdown("---") |
|
|
|
|
|
st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.") |
|
|
|
|
|
|
|
|
results_df = pd.DataFrame(reddit_data) |
|
|
results_df['sentiment'] = sentiment_results |
|
|
|
|
|
st.markdown("#### π§ Sentiment Analysis") |
|
|
sentiment_counts = results_df['sentiment'].value_counts(dropna=True) |
|
|
sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')] |
|
|
sentiment_counts = sentiment_counts.rename(str) |
|
|
sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != ''] |
|
|
|
|
|
sentiment_df = pd.DataFrame({ |
|
|
'Sentiment': sentiment_counts.index, |
|
|
'Count': sentiment_counts.values |
|
|
}) |
|
|
|
|
|
|
|
|
palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47'] |
|
|
|
|
|
if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0: |
|
|
fig = px.pie( |
|
|
sentiment_df, |
|
|
names='Sentiment', |
|
|
values='Count', |
|
|
color='Sentiment', |
|
|
color_discrete_sequence=palette, |
|
|
title='Sentiment Breakdown', |
|
|
hole=0.45 |
|
|
) |
|
|
fig.update_traces( |
|
|
textposition='inside', |
|
|
textinfo='percent+label+value' |
|
|
) |
|
|
fig.update_layout( |
|
|
showlegend=True, |
|
|
plot_bgcolor="#f8fafc", |
|
|
paper_bgcolor="#f8fafc", |
|
|
font=dict(size=15), |
|
|
margin=dict(t=60, b=60, r=40, l=40) |
|
|
) |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
else: |
|
|
st.info("No valid sentiment data for plotting.", icon="πΆ") |
|
|
|
|
|
|
|
|
top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else [] |
|
|
top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else [] |
|
|
|
|
|
st.markdown("### π AI-Generated Summary & Suggestions") |
|
|
summary = get_summary_with_hf_llm( |
|
|
keywords=keywords, |
|
|
sentiment_counts=sentiment_counts, |
|
|
top_subreddits=top_subreddits, |
|
|
top_posts=top_posts, |
|
|
user_query=query |
|
|
) |
|
|
st.info(summary) |
|
|
else: |
|
|
st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="π°") |
|
|
|