import streamlit as st from news_fetcher import fetch_news from keyword_extractor import extract_keywords from sentiment_analyzer import analyze_sentiment from reddit_search import search_reddit import pandas as pd import plotly.express as px import requests st.set_page_config( page_title="INDOPACOM Sentiment Dashboard", layout="wide", initial_sidebar_state="expanded" ) st.markdown( "

🌏 Pseudo Military Sentiment Dashboard

", unsafe_allow_html=True ) st.markdown( "

AI-powered Open Source Research and Analysis: For Educational Purposes Only

", unsafe_allow_html=True ) st.markdown("---") # --- Date range, subreddit, and topic input row --- col1, col2, col3 = st.columns([1, 1, 2]) with col1: date_range = st.selectbox( "Search news from:", options=[("Last 24 hours", 1), ("Last 7 days", 7)], format_func=lambda x: x[0] ) selected_days = date_range[1] with col2: subreddit = st.text_input( "Subreddit (optional)", value="", help="e.g. 'Military', 'worldnews', or leave blank for all" ) with col3: query = st.text_input("Enter your topic or query:", value="Enter Value Here") max_articles = st.slider("Number of news articles:", 5, 25, 12) def clean_keywords(keywords): cleaned = [] seen = set() for kw in keywords: kw = kw.strip() if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen: cleaned.append(kw) seen.add(kw.lower()) return cleaned # --- AI SUMMARY FUNCTION --- def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query): prompt = ( f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n" f"Query: {user_query}\n" f"Keywords found: {', '.join(keywords[:8])}...\n" f"Sentiment counts: {dict(sentiment_counts)}\n" f"Most active subreddits: {', '.join(top_subreddits)}\n" f"Example Reddit post titles: {', '.join(top_posts)}\n" "Then, suggest 3-5 additional related search terms that could improve situational awareness." ) url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta" try: resp = requests.post(url, json={"inputs": prompt}, timeout=60) resp.raise_for_status() output = resp.json() if isinstance(output, list) and "generated_text" in output[0]: return output[0]["generated_text"] elif isinstance(output, dict) and "generated_text" in output: return output["generated_text"] else: return "Summary unavailable (unexpected API output)." except Exception as e: return f"Summary unavailable (LLM error: {e})" st.markdown("---") if st.button("🚀 Analyze!"): progress = st.progress(0, text="Fetching news...") # Fetch news progress.progress(10, text="Fetching news articles...") articles = fetch_news(query=query, days=selected_days, max_results=max_articles) if articles: progress.progress(40, text="Extracting keywords...") keywords = extract_keywords(articles) keywords = clean_keywords(keywords) st.markdown("#### 📰 News Stories") with st.expander("View fetched news stories", expanded=False): for art in articles: st.markdown(f"
{art.get('title','')}
" f"{art.get('content','')[:180]}...
", unsafe_allow_html=True) st.markdown("---") st.markdown("#### 🏷️ Extracted Keywords") st.info(", ".join(keywords) if keywords else "No keywords found.", icon="🔑") st.markdown("---") progress.progress(60, text="Searching Reddit...") reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None) progress.progress(80, text="Analyzing sentiment...") sentiment_results = analyze_sentiment(reddit_data) st.markdown("#### 👾 Reddit Posts") if reddit_data: st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180) else: st.warning("No Reddit posts found for these keywords.", icon="⚠️") st.markdown("---") st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.") # --- Sentiment Results Table & Pie/Donut Chart --- results_df = pd.DataFrame(reddit_data) results_df['sentiment'] = sentiment_results st.markdown("#### 🧠 Sentiment Analysis") sentiment_counts = results_df['sentiment'].value_counts(dropna=True) sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')] sentiment_counts = sentiment_counts.rename(str) sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != ''] sentiment_df = pd.DataFrame({ 'Sentiment': sentiment_counts.index, 'Count': sentiment_counts.values }) # Color palette palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47'] if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0: fig = px.pie( sentiment_df, names='Sentiment', values='Count', color='Sentiment', color_discrete_sequence=palette, title='Sentiment Breakdown', hole=0.45 # Donut chart! ) fig.update_traces( textposition='inside', textinfo='percent+label+value' ) fig.update_layout( showlegend=True, plot_bgcolor="#f8fafc", paper_bgcolor="#f8fafc", font=dict(size=15), margin=dict(t=60, b=60, r=40, l=40) ) st.plotly_chart(fig, use_container_width=True) else: st.info("No valid sentiment data for plotting.", icon="😶") # --- AI SUMMARY SECTION --- top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else [] top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else [] st.markdown("### 📝 AI-Generated Summary & Suggestions") summary = get_summary_with_hf_llm( keywords=keywords, sentiment_counts=sentiment_counts, top_subreddits=top_subreddits, top_posts=top_posts, user_query=query ) st.info(summary) else: st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="📰")