Spaces:

Sami2000
/

Military_Topical_Sentiment_Analysis

Sleeping

File size: 7,035 Bytes

import streamlit as st
from news_fetcher import fetch_news
from keyword_extractor import extract_keywords
from sentiment_analyzer import analyze_sentiment
from reddit_search import search_reddit
import pandas as pd
import plotly.express as px
import requests

st.set_page_config(
    page_title="INDOPACOM Sentiment Dashboard", 
    layout="wide", 
    initial_sidebar_state="expanded"
)

st.markdown(
    "<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Pseudo Military Sentiment Dashboard</h1>",
    unsafe_allow_html=True
)
st.markdown(
    "<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>",
    unsafe_allow_html=True
)
st.markdown("---")

# --- Date range, subreddit, and topic input row ---
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
    date_range = st.selectbox(
        "Search news from:",
        options=[("Last 24 hours", 1), ("Last 7 days", 7)],
        format_func=lambda x: x[0]
    )
    selected_days = date_range[1]
with col2:
    subreddit = st.text_input(
        "Subreddit (optional)", 
        value="",
        help="e.g. 'Military', 'worldnews', or leave blank for all"
    )
with col3:
    query = st.text_input("Enter your topic or query:", value="Enter Value Here")

max_articles = st.slider("Number of news articles:", 5, 25, 12)

def clean_keywords(keywords):
    cleaned = []
    seen = set()
    for kw in keywords:
        kw = kw.strip()
        if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
            cleaned.append(kw)
            seen.add(kw.lower())
    return cleaned

# --- AI SUMMARY FUNCTION ---
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
    prompt = (
        f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
        f"Query: {user_query}\n"
        f"Keywords found: {', '.join(keywords[:8])}...\n"
        f"Sentiment counts: {dict(sentiment_counts)}\n"
        f"Most active subreddits: {', '.join(top_subreddits)}\n"
        f"Example Reddit post titles: {', '.join(top_posts)}\n"
        "Then, suggest 3-5 additional related search terms that could improve situational awareness."
    )

    url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
    try:
        resp = requests.post(url, json={"inputs": prompt}, timeout=60)
        resp.raise_for_status()
        output = resp.json()
        if isinstance(output, list) and "generated_text" in output[0]:
            return output[0]["generated_text"]
        elif isinstance(output, dict) and "generated_text" in output:
            return output["generated_text"]
        else:
            return "Summary unavailable (unexpected API output)."
    except Exception as e:
        return f"Summary unavailable (LLM error: {e})"

st.markdown("---")

if st.button("🚀 Analyze!"):
    progress = st.progress(0, text="Fetching news...")

    # Fetch news
    progress.progress(10, text="Fetching news articles...")
    articles = fetch_news(query=query, days=selected_days, max_results=max_articles)

    if articles:
        progress.progress(40, text="Extracting keywords...")
        keywords = extract_keywords(articles)
        keywords = clean_keywords(keywords)

        st.markdown("#### 📰 News Stories")
        with st.expander("View fetched news stories", expanded=False):
            for art in articles:
                st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
                            f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
                            unsafe_allow_html=True)
        st.markdown("---")

        st.markdown("#### 🏷️ Extracted Keywords")
        st.info(", ".join(keywords) if keywords else "No keywords found.", icon="🔑")
        st.markdown("---")

        progress.progress(60, text="Searching Reddit...")
        reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)

        progress.progress(80, text="Analyzing sentiment...")
        sentiment_results = analyze_sentiment(reddit_data)

        st.markdown("#### 👾 Reddit Posts")
        if reddit_data:
            st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
        else:
            st.warning("No Reddit posts found for these keywords.", icon="⚠️")
        st.markdown("---")

        st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")

        # --- Sentiment Results Table & Pie/Donut Chart ---
        results_df = pd.DataFrame(reddit_data)
        results_df['sentiment'] = sentiment_results

        st.markdown("#### 🧠 Sentiment Analysis")
        sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
        sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
        sentiment_counts = sentiment_counts.rename(str)
        sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != '']

        sentiment_df = pd.DataFrame({
            'Sentiment': sentiment_counts.index,
            'Count': sentiment_counts.values
        })

        # Color palette
        palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']

        if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
            fig = px.pie(
                sentiment_df,
                names='Sentiment',
                values='Count',
                color='Sentiment',
                color_discrete_sequence=palette,
                title='Sentiment Breakdown',
                hole=0.45  # Donut chart!
            )
            fig.update_traces(
                textposition='inside',
                textinfo='percent+label+value'
            )
            fig.update_layout(
                showlegend=True,
                plot_bgcolor="#f8fafc",
                paper_bgcolor="#f8fafc",
                font=dict(size=15),
                margin=dict(t=60, b=60, r=40, l=40)
            )
            st.plotly_chart(fig, use_container_width=True)
        else:
            st.info("No valid sentiment data for plotting.", icon="😶")

        # --- AI SUMMARY SECTION ---
        top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
        top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []

        st.markdown("### 📝 AI-Generated Summary & Suggestions")
        summary = get_summary_with_hf_llm(
            keywords=keywords,
            sentiment_counts=sentiment_counts,
            top_subreddits=top_subreddits,
            top_posts=top_posts,
            user_query=query
        )
        st.info(summary)
    else:
        st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="📰")