File size: 7,035 Bytes
12d8780
 
 
 
 
 
 
bc8676f
12d8780
bc8676f
 
 
 
12d8780
 
bc8676f
4ad7c91
bc8676f
12d8780
bc8676f
4ad7c91
bc8676f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d66ea09
12d8780
 
 
84677f9
 
 
 
 
 
 
 
 
 
bc8676f
 
 
 
 
 
 
 
 
38051e1
bc8676f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12d8780
 
bc8676f
12d8780
 
 
 
 
 
84677f9
bc8676f
 
 
 
 
 
 
 
 
 
 
 
84677f9
12d8780
 
84677f9
12d8780
c05c2e5
84677f9
bc8676f
 
 
 
 
 
 
84677f9
 
73d65c3
84677f9
 
12d8780
bc8676f
0814da7
 
5242888
 
0814da7
5242888
38051e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import streamlit as st
from news_fetcher import fetch_news
from keyword_extractor import extract_keywords
from sentiment_analyzer import analyze_sentiment
from reddit_search import search_reddit
import pandas as pd
import plotly.express as px
import requests

st.set_page_config(
    page_title="INDOPACOM Sentiment Dashboard", 
    layout="wide", 
    initial_sidebar_state="expanded"
)

st.markdown(
    "<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Pseudo Military Sentiment Dashboard</h1>",
    unsafe_allow_html=True
)
st.markdown(
    "<h4 style='text-align:center; color:#375a7f;'>AI-powered Open Source Research and Analysis: For Educational Purposes Only</h4>",
    unsafe_allow_html=True
)
st.markdown("---")

# --- Date range, subreddit, and topic input row ---
col1, col2, col3 = st.columns([1, 1, 2])
with col1:
    date_range = st.selectbox(
        "Search news from:",
        options=[("Last 24 hours", 1), ("Last 7 days", 7)],
        format_func=lambda x: x[0]
    )
    selected_days = date_range[1]
with col2:
    subreddit = st.text_input(
        "Subreddit (optional)", 
        value="",
        help="e.g. 'Military', 'worldnews', or leave blank for all"
    )
with col3:
    query = st.text_input("Enter your topic or query:", value="Enter Value Here")

max_articles = st.slider("Number of news articles:", 5, 25, 12)

def clean_keywords(keywords):
    cleaned = []
    seen = set()
    for kw in keywords:
        kw = kw.strip()
        if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
            cleaned.append(kw)
            seen.add(kw.lower())
    return cleaned

# --- AI SUMMARY FUNCTION ---
def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
    prompt = (
        f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
        f"Query: {user_query}\n"
        f"Keywords found: {', '.join(keywords[:8])}...\n"
        f"Sentiment counts: {dict(sentiment_counts)}\n"
        f"Most active subreddits: {', '.join(top_subreddits)}\n"
        f"Example Reddit post titles: {', '.join(top_posts)}\n"
        "Then, suggest 3-5 additional related search terms that could improve situational awareness."
    )

    url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
    try:
        resp = requests.post(url, json={"inputs": prompt}, timeout=60)
        resp.raise_for_status()
        output = resp.json()
        if isinstance(output, list) and "generated_text" in output[0]:
            return output[0]["generated_text"]
        elif isinstance(output, dict) and "generated_text" in output:
            return output["generated_text"]
        else:
            return "Summary unavailable (unexpected API output)."
    except Exception as e:
        return f"Summary unavailable (LLM error: {e})"

st.markdown("---")

if st.button("πŸš€ Analyze!"):
    progress = st.progress(0, text="Fetching news...")

    # Fetch news
    progress.progress(10, text="Fetching news articles...")
    articles = fetch_news(query=query, days=selected_days, max_results=max_articles)

    if articles:
        progress.progress(40, text="Extracting keywords...")
        keywords = extract_keywords(articles)
        keywords = clean_keywords(keywords)

        st.markdown("#### πŸ“° News Stories")
        with st.expander("View fetched news stories", expanded=False):
            for art in articles:
                st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
                            f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
                            unsafe_allow_html=True)
        st.markdown("---")

        st.markdown("#### 🏷️ Extracted Keywords")
        st.info(", ".join(keywords) if keywords else "No keywords found.", icon="πŸ”‘")
        st.markdown("---")

        progress.progress(60, text="Searching Reddit...")
        reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)

        progress.progress(80, text="Analyzing sentiment...")
        sentiment_results = analyze_sentiment(reddit_data)

        st.markdown("#### πŸ‘Ύ Reddit Posts")
        if reddit_data:
            st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
        else:
            st.warning("No Reddit posts found for these keywords.", icon="⚠️")
        st.markdown("---")

        st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")

        # --- Sentiment Results Table & Pie/Donut Chart ---
        results_df = pd.DataFrame(reddit_data)
        results_df['sentiment'] = sentiment_results

        st.markdown("#### 🧠 Sentiment Analysis")
        sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
        sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
        sentiment_counts = sentiment_counts.rename(str)
        sentiment_counts = sentiment_counts[sentiment_counts.index.str.strip() != '']

        sentiment_df = pd.DataFrame({
            'Sentiment': sentiment_counts.index,
            'Count': sentiment_counts.values
        })

        # Color palette
        palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']

        if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
            fig = px.pie(
                sentiment_df,
                names='Sentiment',
                values='Count',
                color='Sentiment',
                color_discrete_sequence=palette,
                title='Sentiment Breakdown',
                hole=0.45  # Donut chart!
            )
            fig.update_traces(
                textposition='inside',
                textinfo='percent+label+value'
            )
            fig.update_layout(
                showlegend=True,
                plot_bgcolor="#f8fafc",
                paper_bgcolor="#f8fafc",
                font=dict(size=15),
                margin=dict(t=60, b=60, r=40, l=40)
            )
            st.plotly_chart(fig, use_container_width=True)
        else:
            st.info("No valid sentiment data for plotting.", icon="😢")

        # --- AI SUMMARY SECTION ---
        top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
        top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []

        st.markdown("### πŸ“ AI-Generated Summary & Suggestions")
        summary = get_summary_with_hf_llm(
            keywords=keywords,
            sentiment_counts=sentiment_counts,
            top_subreddits=top_subreddits,
            top_posts=top_posts,
            user_query=query
        )
        st.info(summary)
    else:
        st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="πŸ“°")