Sami2000 commited on
Commit
bc8676f
Β·
verified Β·
1 Parent(s): d03cc6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -40
app.py CHANGED
@@ -5,64 +5,106 @@ from sentiment_analyzer import analyze_sentiment
5
  from reddit_search import search_reddit
6
  import pandas as pd
7
  import plotly.express as px
 
8
 
9
- st.set_page_config(page_title="INDOPACOM Sentiment Dashboard", layout="wide")
10
-
11
- st.title("Military Sentiment Dashboard")
12
- st.write(
13
- """
14
- πŸ”Ž **About:** This dashboard finds the latest news about a topic, extracts trending keywords,
15
- and analyzes public sentiment from Reddit using state-of-the-art AI.
16
- \n
17
- _Educational demonstration only. Does not represent any official views._
18
- """
19
  )
20
 
21
- # --- NEW: Date range selection ---
22
- date_range = st.selectbox(
23
- "Search news from:",
24
- options=[("Last 24 hours", 1), ("Last 7 days", 7)],
25
- format_func=lambda x: x[0]
26
  )
27
- selected_days = date_range[1]
28
-
29
- # --- NEW: Subreddit input ---
30
- subreddit = st.text_input("Specify a subreddit (optional, e.g., 'Military' or 'worldnews'). Leave blank for all.", value="")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
- query = st.text_input("Enter your topic or query:", value="Enter Value Here")
33
  max_articles = st.slider("Number of news articles:", 5, 25, 12)
34
 
35
- # --- CLEANING FUNCTION ---
36
  def clean_keywords(keywords):
37
- """
38
- Remove blanks, punctuation-only, and duplicates (case-insensitive).
39
- Returns a cleaned list of keywords.
40
- """
41
  cleaned = []
42
  seen = set()
43
  for kw in keywords:
44
  kw = kw.strip()
45
- # Only keep if non-empty and contains at least one alphanumeric character
46
  if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
47
  cleaned.append(kw)
48
  seen.add(kw.lower())
49
  return cleaned
50
 
51
- if st.button("Search"):
52
- # --- Fancy progress bar ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  progress = st.progress(0, text="Fetching news...")
54
 
55
- # Step 1: Fetch news
56
  progress.progress(10, text="Fetching news articles...")
57
  articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
58
 
59
  if articles:
60
  progress.progress(40, text="Extracting keywords...")
61
  keywords = extract_keywords(articles)
62
-
63
- # --- Clean up keywords ---
64
  keywords = clean_keywords(keywords)
65
- st.write("**Extracted Keywords for Reddit Search:**", keywords)
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  progress.progress(60, text="Searching Reddit...")
68
  reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
@@ -70,17 +112,20 @@ if st.button("Search"):
70
  progress.progress(80, text="Analyzing sentiment...")
71
  sentiment_results = analyze_sentiment(reddit_data)
72
 
73
- # --- Display results ---
 
 
 
 
 
 
74
  st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
75
 
76
- # Create DataFrame for results
77
  results_df = pd.DataFrame(reddit_data)
78
  results_df['sentiment'] = sentiment_results
79
 
80
- # Optional: Show data table
81
- st.dataframe(results_df)
82
-
83
- # Robust and crash-proof sentiment plot!
84
  sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
85
  sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
86
  sentiment_counts = sentiment_counts.rename(str)
@@ -91,16 +136,47 @@ if st.button("Search"):
91
  'Count': sentiment_counts.values
92
  })
93
 
 
 
 
94
  if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
95
  fig = px.bar(
96
  sentiment_df,
97
  x='Sentiment',
98
  y='Count',
 
 
 
99
  labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
100
  title='Sentiment Distribution'
101
  )
 
 
 
 
 
 
 
 
 
 
102
  st.plotly_chart(fig, use_container_width=True)
103
  else:
104
- st.info("No valid sentiment data for plotting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  else:
106
- st.warning("No news articles found for that query. Try a different topic or broaden the date range.")
 
5
  from reddit_search import search_reddit
6
  import pandas as pd
7
  import plotly.express as px
8
+ import requests
9
 
10
+ st.set_page_config(
11
+ page_title="INDOPACOM Sentiment Dashboard",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded"
 
 
 
 
 
 
14
  )
15
 
16
+ st.markdown(
17
+ "<h1 style='text-align:center; color:#183153; font-weight:900;'>🌏 Military Sentiment Dashboard</h1>",
18
+ unsafe_allow_html=True
 
 
19
  )
20
+ st.markdown(
21
+ "<h4 style='text-align:center; color:#375a7f;'>AI-powered OSINT: See what the news & social web really think</h4>",
22
+ unsafe_allow_html=True
23
+ )
24
+ st.markdown("---")
25
+
26
+ # --- Date range, subreddit, and topic input row ---
27
+ col1, col2, col3 = st.columns([1, 1, 2])
28
+ with col1:
29
+ date_range = st.selectbox(
30
+ "Search news from:",
31
+ options=[("Last 24 hours", 1), ("Last 7 days", 7)],
32
+ format_func=lambda x: x[0]
33
+ )
34
+ selected_days = date_range[1]
35
+ with col2:
36
+ subreddit = st.text_input(
37
+ "Subreddit (optional)",
38
+ value="",
39
+ help="e.g. 'Military', 'worldnews', or leave blank for all"
40
+ )
41
+ with col3:
42
+ query = st.text_input("Enter your topic or query:", value="US Army INDOPACOM")
43
 
 
44
  max_articles = st.slider("Number of news articles:", 5, 25, 12)
45
 
 
46
  def clean_keywords(keywords):
 
 
 
 
47
  cleaned = []
48
  seen = set()
49
  for kw in keywords:
50
  kw = kw.strip()
 
51
  if kw and any(c.isalnum() for c in kw) and kw.lower() not in seen:
52
  cleaned.append(kw)
53
  seen.add(kw.lower())
54
  return cleaned
55
 
56
+ # --- AI SUMMARY FUNCTION ---
57
+ def get_summary_with_hf_llm(keywords, sentiment_counts, top_subreddits, top_posts, user_query):
58
+ prompt = (
59
+ f"Summarize these OSINT findings in 3-4 sentences for a non-technical military audience.\n"
60
+ f"Query: {user_query}\n"
61
+ f"Keywords found: {', '.join(keywords[:8])}...\n"
62
+ f"Sentiment counts: {dict(sentiment_counts)}\n"
63
+ f"Most active subreddits: {', '.join(top_subreddits)}\n"
64
+ f"Example Reddit post titles: {', '.join(top_posts)}\n"
65
+ "Then, suggest 3-5 additional related search terms that could improve situational awareness."
66
+ )
67
+
68
+ url = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
69
+ try:
70
+ resp = requests.post(url, json={"inputs": prompt}, timeout=60)
71
+ resp.raise_for_status()
72
+ output = resp.json()
73
+ # Output format can vary; support both possible types:
74
+ if isinstance(output, list) and "generated_text" in output[0]:
75
+ return output[0]["generated_text"]
76
+ elif isinstance(output, dict) and "generated_text" in output:
77
+ return output["generated_text"]
78
+ else:
79
+ return "Summary unavailable (unexpected API output)."
80
+ except Exception as e:
81
+ return f"Summary unavailable (LLM error: {e})"
82
+
83
+ st.markdown("---")
84
+
85
+ if st.button("πŸš€ Analyze!"):
86
  progress = st.progress(0, text="Fetching news...")
87
 
88
+ # Fetch news
89
  progress.progress(10, text="Fetching news articles...")
90
  articles = fetch_news(query=query, days=selected_days, max_results=max_articles)
91
 
92
  if articles:
93
  progress.progress(40, text="Extracting keywords...")
94
  keywords = extract_keywords(articles)
 
 
95
  keywords = clean_keywords(keywords)
96
+
97
+ st.markdown("#### πŸ“° News Stories")
98
+ with st.expander("View fetched news stories", expanded=False):
99
+ for art in articles:
100
+ st.markdown(f"<div style='padding:8px 0;'><b>{art.get('title','')}</b><br>"
101
+ f"<span style='color:#4a4a4a;font-size:0.9em'>{art.get('content','')[:180]}...</span></div>",
102
+ unsafe_allow_html=True)
103
+ st.markdown("---")
104
+
105
+ st.markdown("#### 🏷️ Extracted Keywords")
106
+ st.info(", ".join(keywords) if keywords else "No keywords found.", icon="πŸ”‘")
107
+ st.markdown("---")
108
 
109
  progress.progress(60, text="Searching Reddit...")
110
  reddit_data = search_reddit(keywords, subreddit=subreddit if subreddit else None)
 
112
  progress.progress(80, text="Analyzing sentiment...")
113
  sentiment_results = analyze_sentiment(reddit_data)
114
 
115
+ st.markdown("#### πŸ‘Ύ Reddit Posts")
116
+ if reddit_data:
117
+ st.dataframe(pd.DataFrame(reddit_data)[["body", "subreddit", "score"]], height=180)
118
+ else:
119
+ st.warning("No Reddit posts found for these keywords.", icon="⚠️")
120
+ st.markdown("---")
121
+
122
  st.success(f"Found {len(reddit_data)} Reddit posts. Sentiment analysis complete.")
123
 
124
+ # --- Sentiment Results Table & Plot ---
125
  results_df = pd.DataFrame(reddit_data)
126
  results_df['sentiment'] = sentiment_results
127
 
128
+ st.markdown("#### 🧠 Sentiment Analysis")
 
 
 
129
  sentiment_counts = results_df['sentiment'].value_counts(dropna=True)
130
  sentiment_counts = sentiment_counts[~sentiment_counts.index.isna() & (sentiment_counts.index != '')]
131
  sentiment_counts = sentiment_counts.rename(str)
 
136
  'Count': sentiment_counts.values
137
  })
138
 
139
+ # Nice color palette for bars
140
+ palette = ['#183153', '#3277b3', '#375a7f', '#3bb273', '#ffb347', '#e05a47']
141
+
142
  if not sentiment_df.empty and sentiment_df['Sentiment'].nunique() > 0:
143
  fig = px.bar(
144
  sentiment_df,
145
  x='Sentiment',
146
  y='Count',
147
+ color='Sentiment',
148
+ color_discrete_sequence=palette,
149
+ text='Count',
150
  labels={'Sentiment': 'Sentiment', 'Count': 'Count'},
151
  title='Sentiment Distribution'
152
  )
153
+ fig.update_traces(marker_line_width=1, textposition="outside")
154
+ fig.update_layout(
155
+ yaxis=dict(title='Count'),
156
+ xaxis=dict(title='Sentiment'),
157
+ showlegend=False,
158
+ plot_bgcolor="#f8fafc",
159
+ paper_bgcolor="#f8fafc",
160
+ font=dict(size=15),
161
+ margin=dict(t=60, b=60, r=40, l=40)
162
+ )
163
  st.plotly_chart(fig, use_container_width=True)
164
  else:
165
+ st.info("No valid sentiment data for plotting.", icon="😢")
166
+
167
+ # --- AI SUMMARY SECTION ---
168
+ # Get top 3 subreddits and top 3 post titles for summary
169
+ top_subreddits = results_df['subreddit'].value_counts().index[:3].tolist() if 'subreddit' in results_df else []
170
+ top_posts = results_df['body'].dropna().astype(str).str[:50].tolist()[:3] if 'body' in results_df else []
171
+
172
+ st.markdown("### πŸ“ AI-Generated Summary & Suggestions")
173
+ summary = get_summary_with_hf_llm(
174
+ keywords=keywords,
175
+ sentiment_counts=sentiment_counts,
176
+ top_subreddits=top_subreddits,
177
+ top_posts=top_posts,
178
+ user_query=query
179
+ )
180
+ st.info(summary)
181
  else:
182
+ st.warning("No news articles found for that query. Try a different topic or broaden the date range.", icon="πŸ“°")