Spaces:

Sami2000
/

Military_Topical_Sentiment_Analysis

Sleeping

App Files Files Community

Military_Topical_Sentiment_Analysis / app.py

Sami2000

Update app.py

4ad7c91 verified 7 months ago

raw

history blame contribute delete

7.04 kB

	import streamlit as st
	from news_fetcher import fetch_news
	from keyword_extractor import extract_keywords
	from sentiment_analyzer import analyze_sentiment
	from reddit_search import search_reddit
	import pandas as pd
	import plotly.express as px
	import requests

	st.set_page_config(
	page_title="INDOPACOM Sentiment Dashboard",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	st.markdown(
	"<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Pseudo Military Sentiment Dashboard</h1>",
	unsafe_allow_html=True
	)
	st.markdown(
	"<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>",
	unsafe_allow_html=True
	)
	st.markdown("---")

	# --- Date range, subreddit, and topic input row ---
	col1, col2, col3 = st.columns([1, 1, 2])
	with col1:
	date_range = st.selectbox(
	"Search news from:",
	options=[("Last 24 hours", 1), ("Last 7 days", 7)],
	format_func=lambda x: x[0]
	)
	selected_days = date_range[1]
	with col2:
	subreddit = st.text_input(
	"Subreddit (optional)",
	value="",
	help="e.g. 'Military', 'worldnews', or leave blank for all"
	)
	with col3:
	query = st.text_input("Enter your topic or query:", value="Enter Value Here")

	max_articles = st.slider("Number of news articles:", 5, 25, 12)

	def clean_keywords(keywords):
	cleaned = []
	seen = set()
	for kw in keywords:
	kw = kw.strip()
	if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
	cleaned.append(kw)
	seen.add(kw.lower())
	return cleaned

	# --- AI SUMMARY FUNCTION ---
	def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
	prompt = (
	f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
	f"Query: {user_query}\n"
	f"Keywords found: {', '.join(keywords[:8])}...\n"
	f"Sentiment counts: {dict(sentiment_counts)}\n"
	f"Most active subreddits: {', '.join(top_subreddits)}\n"
	f"Example Reddit post titles: {', '.join(top_posts)}\n"
	"Then, suggest 3-5 additional related search terms that could improve situational awareness."
	)

	url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
	try:
	resp = requests.post(url, json={"inputs": prompt}, timeout=60)
	resp.raise_for_status()
	output = resp.json()
	if isinstance(output, list) and "generated_text" in output[0]:
	return output[0]["generated_text"]
	elif isinstance(output, dict) and "generated_text" in output:
	return output["generated_text"]
	else:
	return "Summary unavailable (unexpected API output)."
	except Exception as e:
	return f"Summary unavailable (LLM error: {e})"

	st.markdown("---")

	if st.button("🚀 Analyze!"):
	progress = st.progress(0, text="Fetching news...")

	# Fetch news
	progress.progress(10, text="Fetching news articles...")
	articles = fetch_news(query=query, days=selected_days, max_results=max_articles)

	if articles:
	progress.progress(40, text="Extracting keywords...")
	keywords = extract_keywords(articles)
	keywords = clean_keywords(keywords)

	st.markdown("#### 📰 News Stories")
	with st.expander("View fetched news stories", expanded=False):
	for art in articles:
	st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
	f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
	unsafe_allow_html=True)
	st.markdown("---")

	st.markdown("#### 🏷️ Extracted Keywords")
	st.info(", ".join(keywords) if keywords else "No keywords found.", icon="🔑")
	st.markdown("---")

	progress.progress(60, text="Searching Reddit...")
	reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)

	progress.progress(80, text="Analyzing sentiment...")
	sentiment_results = analyze_sentiment(reddit_data)

	st.markdown("#### 👾 Reddit Posts")
	if reddit_data:
	st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
	else:
	st.warning("No Reddit posts found for these keywords.", icon="⚠️")
	st.markdown("---")

	st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")

	# --- Sentiment Results Table & Pie/Donut Chart ---
	results_df = pd.DataFrame(reddit_data)
	results_df['sentiment'] = sentiment_results

	st.markdown("#### 🧠 Sentiment Analysis")
	sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
	sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
	sentiment_counts = sentiment_counts.rename(str)
	sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != '']

	sentiment_df = pd.DataFrame({
	'Sentiment': sentiment_counts.index,
	'Count': sentiment_counts.values
	})

	# Color palette
	palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']

	if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
	fig = px.pie(
	sentiment_df,
	names='Sentiment',
	values='Count',
	color='Sentiment',
	color_discrete_sequence=palette,
	title='Sentiment Breakdown',
	hole=0.45 # Donut chart!
	)
	fig.update_traces(
	textposition='inside',
	textinfo='percent+label+value'
	)
	fig.update_layout(
	showlegend=True,
	plot_bgcolor="#f8fafc",
	paper_bgcolor="#f8fafc",
	font=dict(size=15),
	margin=dict(t=60, b=60, r=40, l=40)
	)
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.info("No valid sentiment data for plotting.", icon="😶")

	# --- AI SUMMARY SECTION ---
	top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
	top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []

	st.markdown("### 📝 AI-Generated Summary & Suggestions")
	summary = get_summary_with_hf_llm(
	keywords=keywords,
	sentiment_counts=sentiment_counts,
	top_subreddits=top_subreddits,
	top_posts=top_posts,
	user_query=query
	)
	st.info(summary)
	else:
	st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="📰")