File size: 7,035 Bytes
12d8780 bc8676f 12d8780 bc8676f 12d8780 bc8676f 4ad7c91 bc8676f 12d8780 bc8676f 4ad7c91 bc8676f d66ea09 12d8780 84677f9 bc8676f 38051e1 bc8676f 12d8780 bc8676f 12d8780 84677f9 bc8676f 84677f9 12d8780 84677f9 12d8780 c05c2e5 84677f9 bc8676f 84677f9 73d65c3 84677f9 12d8780 bc8676f 0814da7 5242888 0814da7 5242888 38051e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import streamlit as st
from news_fetcher import fetch_news
from keyword_extractor import extract_keywords
from sentiment_analyzer import analyze_sentiment
from reddit_search import search_reddit
import pandas as pd
import plotly.express as px
import requests
st.set_page_config(
page_title="INDOPACOM Sentiment Dashboard",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown(
"<h1 style='text-align:center; color:#183153; font-weight:900;'>π Pseudo Military Sentiment Dashboard</h1>",
unsafe_allow_html=True
)
st.markdown(
"<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>",
unsafe_allow_html=True
)
st.markdown("---")
# --- Date range, subreddit, and topic input row ---
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
date_range = st.selectbox(
"Search news from:",
options=[("Last 24 hours", 1), ("Last 7 days", 7)],
format_func=lambda x: x[0]
)
selected_days = date_range[1]
with col2:
subreddit = st.text_input(
"Subreddit (optional)",
value="",
help="e.g. 'Military', 'worldnews', or leave blank for all"
)
with col3:
query = st.text_input("Enter your topic or query:", value="Enter Value Here")
max_articles = st.slider("Number of news articles:", 5, 25, 12)
def clean_keywords(keywords):
cleaned = []
seen = set()
for kw in keywords:
kw = kw.strip()
if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
cleaned.append(kw)
seen.add(kw.lower())
return cleaned
# --- AI SUMMARY FUNCTION ---
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
prompt = (
f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
f"Query: {user_query}\n"
f"Keywords found: {', '.join(keywords[:8])}...\n"
f"Sentiment counts: {dict(sentiment_counts)}\n"
f"Most active subreddits: {', '.join(top_subreddits)}\n"
f"Example Reddit post titles: {', '.join(top_posts)}\n"
"Then, suggest 3-5 additional related search terms that could improve situational awareness."
)
url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
try:
resp = requests.post(url, json={"inputs": prompt}, timeout=60)
resp.raise_for_status()
output = resp.json()
if isinstance(output, list) and "generated_text" in output[0]:
return output[0]["generated_text"]
elif isinstance(output, dict) and "generated_text" in output:
return output["generated_text"]
else:
return "Summary unavailable (unexpected API output)."
except Exception as e:
return f"Summary unavailable (LLM error: {e})"
st.markdown("---")
if st.button("π Analyze!"):
progress = st.progress(0, text="Fetching news...")
# Fetch news
progress.progress(10, text="Fetching news articles...")
articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
if articles:
progress.progress(40, text="Extracting keywords...")
keywords = extract_keywords(articles)
keywords = clean_keywords(keywords)
st.markdown("#### π° News Stories")
with st.expander("View fetched news stories", expanded=False):
for art in articles:
st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
unsafe_allow_html=True)
st.markdown("---")
st.markdown("#### π·οΈ Extracted Keywords")
st.info(", ".join(keywords) if keywords else "No keywords found.", icon="π")
st.markdown("---")
progress.progress(60, text="Searching Reddit...")
reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
progress.progress(80, text="Analyzing sentiment...")
sentiment_results = analyze_sentiment(reddit_data)
st.markdown("#### πΎ Reddit Posts")
if reddit_data:
st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
else:
st.warning("No Reddit posts found for these keywords.", icon="β οΈ")
st.markdown("---")
st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
# --- Sentiment Results Table & Pie/Donut Chart ---
results_df = pd.DataFrame(reddit_data)
results_df['sentiment'] = sentiment_results
st.markdown("#### π§ Sentiment Analysis")
sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
sentiment_counts = sentiment_counts.rename(str)
sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != '']
sentiment_df = pd.DataFrame({
'Sentiment': sentiment_counts.index,
'Count': sentiment_counts.values
})
# Color palette
palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']
if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
fig = px.pie(
sentiment_df,
names='Sentiment',
values='Count',
color='Sentiment',
color_discrete_sequence=palette,
title='Sentiment Breakdown',
hole=0.45 # Donut chart!
)
fig.update_traces(
textposition='inside',
textinfo='percent+label+value'
)
fig.update_layout(
showlegend=True,
plot_bgcolor="#f8fafc",
paper_bgcolor="#f8fafc",
font=dict(size=15),
margin=dict(t=60, b=60, r=40, l=40)
)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("No valid sentiment data for plotting.", icon="πΆ")
# --- AI SUMMARY SECTION ---
top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []
st.markdown("### π AI-Generated Summary & Suggestions")
summary = get_summary_with_hf_llm(
keywords=keywords,
sentiment_counts=sentiment_counts,
top_subreddits=top_subreddits,
top_posts=top_posts,
user_query=query
)
st.info(summary)
else:
st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="π°")
|