NKessler commited on
Commit
ba64d84
·
verified ·
1 Parent(s): a9bc45d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -265
app.py CHANGED
@@ -1,13 +1,12 @@
1
  # imports
2
- import re
3
- import typing
 
 
4
  import plotly.graph_objects as go
5
  import streamlit as st
 
6
  from textblob import TextBlob
7
- import json
8
- import google.generativeai as genai
9
- import os
10
- import concurrent.futures
11
  import textstat
12
  import trafilatura
13
  import requests
@@ -17,17 +16,35 @@ import nltk
17
  # constants
18
  MAX_WORDS = 400
19
 
20
- ARTICLE_A = """In a watershed moment for global tech governance, international regulatory bodies have introduced the comprehensive Artificial Intelligence Safeguard Act. For too long, Silicon Valley titans have operated in a wild west environment, prioritizing unchecked corporate greed and rapid deployment over public safety. This landmark legislation aims to establish rigorous ethical boundaries and mandatory safety audits before any advanced generative models can be released to the public. Proponents argue that without these essential guardrails, society faces catastrophic risks ranging from massive, unmitigated job displacement to the proliferation of deepfake-fueled misinformation that threatens the very fabric of our democratic institutions. "We cannot allow a handful of unelected tech billionaires to play roulette with humanity's future," stated the coalition's lead ethicist. By prioritizing human welfare over blind technological acceleration, the Act serves as a vital moral firewall, ensuring that the development of artificial general intelligence benefits society as a whole rather than just enriching the elite few."""
21
- ARTICLE_B = """Tech industry leaders and economists are sounding the alarm over the newly proposed Artificial Intelligence Safeguard Act, warning that the draconian legislation will severely cripple the nation’s economic engine. Critics argue that the bill is a masterclass in bureaucratic overreach, drowning agile tech startups in layers of punitive red tape and effectively stifling the very innovation that drives modern prosperity. By mandating arbitrary algorithmic audits and imposing heavy-handed restrictions on model training, the government is poised to surrender our global competitive edge to foreign adversaries who are not bound by such paralyzing regulations. "This isn't about safety; it's an innovation tax that penalizes success," argued a prominent venture capitalist. Analysts project that this short-sighted policy will force thousands of AI researchers to relocate overseas, draining billions of dollars in investment capital from the domestic market. Ultimately, framing technological progress as an inherent danger will only succeed in legislating the industry into obsolescence, destroying millions of future private-sector jobs in the process."""
22
- URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
23
- URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
24
 
25
  # Initialize the AI model
26
- GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
27
- if GEMINI_API_KEY:
28
- genai.configure(api_key=GEMINI_API_KEY)
 
29
 
30
- ai_model = genai.GenerativeModel('gemini-2.5-pro')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def _truncate_to_words(text: str, limit: int) -> str:
33
  """Truncates text by word count."""
@@ -65,14 +82,14 @@ def analyze_article(text: str) -> dict:
65
  Text to analyze:
66
  "{safe_text}"
67
  """
68
- response = ai_model.generate_content(
69
- prompt,
70
- generation_config={
71
- "response_mime_type": "application/json",
72
- "temperature": 0.1,
73
- }
74
  )
75
- llm_data = json.loads(response.text)
76
 
77
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
78
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
@@ -97,86 +114,6 @@ def analyze_article(text: str) -> dict:
97
  "reading_ease": max(0.0, min(100.0, raw_reading_ease)),
98
  }
99
 
100
-
101
- def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
102
- """Generates a Plotly gauge chart for sentiment visualization."""
103
- fig = go.Figure(
104
- go.Indicator(
105
- mode="gauge+number",
106
- value=score,
107
- domain={"x": [0, 1], "y": [0, 1]},
108
- title={"text": title, "font": {"size": 16}},
109
- gauge={
110
- "axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
111
- "bar": {"color": "#475569", "thickness": 0.2},
112
- "bgcolor": "white",
113
- "borderwidth": 0,
114
- "steps": [
115
- {"range": [-1, -0.1], "color": "#fee2e2"},
116
- {"range": [-0.1, 0.1], "color": "#f1f5f9"},
117
- {"range": [0.1, 1], "color": "#dcfce3"},
118
- ],
119
- },
120
- )
121
- )
122
- fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
123
- return fig
124
-
125
-
126
- def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
127
- """Generates an overlapping radar chart to compare emotions."""
128
- categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
129
-
130
- val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
131
- val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
132
-
133
- categories.append(categories[0])
134
- val_a.append(val_a[0])
135
- val_b.append(val_b[0])
136
-
137
- fig = go.Figure()
138
- fig.add_trace(go.Scatterpolar(
139
- r=val_a, theta=categories, fill='toself', name='Source A',
140
- line=dict(color='#4f46e5', shape='spline', width=2),
141
- fillcolor='rgba(79, 70, 229, 0.2)'
142
- ))
143
- fig.add_trace(go.Scatterpolar(
144
- r=val_b, theta=categories, fill='toself', name='Source B',
145
- line=dict(color='#10b981', shape='spline', width=2),
146
- fillcolor='rgba(16, 185, 129, 0.2)'
147
- ))
148
- fig.update_layout(
149
- polar=dict(
150
- radialaxis=dict(visible=True, showticklabels=False, showline=False, gridcolor='rgba(0,0,0,0.1)'),
151
- angularaxis=dict(gridcolor='rgba(0,0,0,0.1)', linecolor='rgba(0,0,0,0.1)')
152
- ),
153
- showlegend=True,
154
- legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
155
- title={"text": "Relative Emotion Profile", "font": {"size": 18, "family": "sans-serif"}},
156
- height=400,
157
- margin=dict(l=40, r=40, t=60, b=40),
158
- paper_bgcolor='rgba(0,0,0,0)', # Transparent
159
- plot_bgcolor='rgba(0,0,0,0)'
160
- )
161
- return fig
162
-
163
-
164
- def _highlight_framing_words(text: str, target_words: list) -> str:
165
- """Highlights LLM-identified framing words in the synced text snippet."""
166
- display_text = _truncate_to_words(text, MAX_WORDS)
167
- if not display_text:
168
- return ""
169
-
170
- highlighted_text = display_text + ("..." if len(text.split()) > MAX_WORDS else "")
171
-
172
- for word in target_words:
173
- if len(word) > 2:
174
- pattern = r'\b(' + re.escape(word) + r')\b'
175
- replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
176
- highlighted_text = re.sub(pattern, replacement, highlighted_text, flags=re.IGNORECASE)
177
-
178
- return highlighted_text
179
-
180
  @st.cache_data(ttl=3600, show_spinner=False)
181
  def fetch_article_text(url: str) -> str:
182
  """Scrapes article text."""
@@ -208,179 +145,103 @@ def fetch_article_text(url: str) -> str:
208
 
209
  return "Error: Could not extract text. The site may be protected by hard paywalls."
210
 
211
-
212
- def check_contradiction(text_a: str, text_b: str) -> dict:
213
- """Uses the LLM to evaluate the stance between arguments."""
214
- safe_a = _truncate_to_words(text_a, MAX_WORDS)
215
- safe_b = _truncate_to_words(text_b, MAX_WORDS)
216
-
217
- prompt = f"""
218
- You are a fact-checking analyst. Compare these two news excerpts.
219
- Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
220
-
221
- Keys to return:
222
- "relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
223
- "confidence": A float between 0.0 and 1.0 representing how confident you are.
224
-
225
- Text 1: "{safe_a}"
226
- Text 2: "{safe_b}"
227
- """
228
- response = ai_model.generate_content(
229
- prompt,
230
- generation_config={
231
- "response_mime_type": "application/json",
232
- "temperature": 0.1,
233
- }
234
- )
235
- result = json.loads(response.text)
236
- return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
237
 
 
 
 
 
 
 
 
238
 
239
- # USER INTERFACE
240
- st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- if not GEMINI_API_KEY:
243
- st.warning("Gemini API Token Missing.")
244
- st.stop()
 
 
 
 
 
 
 
245
 
246
- st.markdown("""
247
- <style>
248
- #MainMenu {visibility: hidden;}
249
- footer {visibility: hidden;}
250
- header {visibility: hidden;}
251
-
252
- .block-container {
253
- padding-top: 2rem;
254
- padding-bottom: 2rem;
255
- }
256
 
257
- [data-testid="stMetric"] {
258
- background-color: #f8fafc;
259
- border: 1px solid #e2e8f0;
260
- border-radius: 8px;
261
- padding: 15px;
262
- box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
263
- }
264
-
265
- [data-testid="stMetricValue"] > div {
266
- white-space: normal !important;
267
- word-wrap: break-word !important;
268
- line-height: 1.2 !important;
269
- font-size: 1.6rem !important;
270
- }
271
- </style>
272
- """, unsafe_allow_html=True)
273
 
274
  # STATE MANAGEMENT
275
- if "results_a" not in st.session_state:
276
- st.session_state.results_a = None
277
- if "results_b" not in st.session_state:
278
- st.session_state.results_b = None
279
- if "nli_result" not in st.session_state:
280
- st.session_state.nli_result = None
281
-
282
- st.title("FrameVis")
283
- st.markdown("##### Media bias and framing effects across global news sources.")
284
- st.divider()
285
 
286
- input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
287
 
288
- col1, col2 = st.columns(2)
289
-
290
- with col1:
291
- if input_method == "Paste Text":
292
- user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
293
  else:
294
- url_a = st.text_input("Source A URL", value=URL_A)
295
- user_article_a = fetch_article_text(url_a) if url_a else ""
296
-
297
- with col2:
298
- if input_method == "Paste Text":
299
- user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
300
- else:
301
- url_b = st.text_input("Source B URL", value=URL_B)
302
- user_article_b = fetch_article_text(url_b) if url_b else ""
303
-
304
- st.write("")
305
-
306
- # Execution button
307
- if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
308
-
309
- text_a_clean = user_article_a.strip() if user_article_a else ""
310
- text_b_clean = user_article_b.strip() if user_article_b else ""
311
-
312
- if not text_a_clean or not text_b_clean:
313
- st.warning("Please provide text or a valid URL for both Source A and Source B before analyzing.")
314
-
315
- elif text_a_clean.startswith("Error:") or text_b_clean.startswith("Error:"):
316
- st.error("One of the URLs could not be scraped. Please copy and paste the text directly.")
317
-
318
- else:
319
- with st.spinner("Analyzing framing semantics for both sources."):
320
- try:
321
- with concurrent.futures.ThreadPoolExecutor() as executor:
322
- future_a = executor.submit(analyze_article, text_a_clean)
323
- future_b = executor.submit(analyze_article, text_b_clean)
324
- future_nli = executor.submit(check_contradiction, text_a_clean, text_b_clean)
325
 
326
- st.session_state.results_a = future_a.result()
327
- st.session_state.results_b = future_b.result()
328
- st.session_state.nli_result = future_nli.result()
329
- except Exception as e:
330
- st.error(f"API or Processing Error: {str(e)}")
331
- st.session_state.results_a = None
332
- st.session_state.results_b = None
333
- st.session_state.nli_result = None
334
-
335
- # Analysis Display
336
- if st.session_state.results_a and st.session_state.results_b:
 
 
 
 
 
337
  st.divider()
338
- st.markdown("### Framing Analytics & Comparison")
339
-
340
- # Display Contradictions
341
- nli_result = st.session_state.nli_result
342
- if nli_result:
343
- if nli_result["relationship"].upper() == "CONTRADICTION":
344
- st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are disputing each other's facts.")
345
- elif nli_result["relationship"].upper() == "ENTAILMENT":
346
- st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
347
- else:
348
- st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
349
-
350
- st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
351
-
352
- res_col1, res_col2 = st.columns(2)
353
-
354
- # Render Column A
355
- with res_col1:
356
- r_a = st.session_state.results_a
357
- st.markdown("#### Source A Breakdown")
358
- m1, m2 = st.columns(2)
359
- m3, m4 = st.columns(2)
360
- m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
361
- m2.metric("Primary Emotion", str(r_a['primary_tone']).title())
362
- m3.metric("Framing Lens", str(r_a['primary_theme']).title())
363
- m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
364
-
365
- st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
366
-
367
- st.markdown("**Key Framing Language:**")
368
- annotated_text = _highlight_framing_words(user_article_a, r_a['framing_words'])
369
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
370
-
371
- # Render Column B
372
- with res_col2:
373
- r_b = st.session_state.results_b
374
- st.markdown("#### Source B Breakdown")
375
- m1, m2 = st.columns(2)
376
- m3, m4 = st.columns(2)
377
- m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
378
- m2.metric("Primary Emotion", str(r_b['primary_tone']).title())
379
- m3.metric("Framing Lens", str(r_b['primary_theme']).title())
380
- m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
381
-
382
- st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
383
-
384
- st.markdown("**Key Framing Language:**")
385
- annotated_text = _highlight_framing_words(user_article_b, r_b['framing_words'])
386
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
 
1
  # imports
2
+ import os
3
+ import json
4
+ import urllib.parse
5
+ import concurrent.futures
6
  import plotly.graph_objects as go
7
  import streamlit as st
8
+ from groq import Groq
9
  from textblob import TextBlob
 
 
 
 
10
  import textstat
11
  import trafilatura
12
  import requests
 
16
  # constants
17
  MAX_WORDS = 400
18
 
19
+ st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
 
 
 
20
 
21
  # Initialize the AI model
22
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
23
+ if GROQ_API_KEY:
24
+ client = Groq(api_key=GROQ_API_KEY)
25
+
26
 
27
+ @st.cache_data(ttl=3600, show_spinner=False)
28
+ def fetch_topic_news(query: str, limit: int = 8) -> list:
29
+ """Fetches news articles for a topic using Google News RSS."""
30
+ encoded_query = urllib.parse.quote(query)
31
+ rss_url = f"https://news.google.com/rss/search?q={encoded_query}&hl=en-US&gl=US&ceid=US:en"
32
+
33
+ try:
34
+ response = requests.get(rss_url, timeout=10)
35
+ soup = BeautifulSoup(response.content, features="xml")
36
+ items = soup.findAll('item')[:limit]
37
+
38
+ articles = []
39
+ for item in items:
40
+ articles.append({
41
+ "publisher": item.source.text if item.source else "Unknown Outlet",
42
+ "title": item.title.text,
43
+ "url": item.link.text
44
+ })
45
+ return articles
46
+ except Exception as e:
47
+ return []
48
 
49
  def _truncate_to_words(text: str, limit: int) -> str:
50
  """Truncates text by word count."""
 
82
  Text to analyze:
83
  "{safe_text}"
84
  """
85
+ response = client.chat.completions.create(
86
+ model="llama-3.3-70b-versatile",
87
+ messages=[{"role": "user", "content": prompt}],
88
+ max_tokens=300,
89
+ temperature=0.1,
90
+ response_format={"type": "json_object"}
91
  )
92
+ llm_data = json.loads(response.choices[0].message.content)
93
 
94
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
95
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
 
114
  "reading_ease": max(0.0, min(100.0, raw_reading_ease)),
115
  }
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  @st.cache_data(ttl=3600, show_spinner=False)
118
  def fetch_article_text(url: str) -> str:
119
  """Scrapes article text."""
 
145
 
146
  return "Error: Could not extract text. The site may be protected by hard paywalls."
147
 
148
+ def _create_macro_scatter_plot(results: list) -> go.Figure:
149
+ """Generates a scatter plot of multiple media outlets."""
150
+ fig = go.Figure()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ color_map = {
153
+ "economic consequences": "#3b82f6",
154
+ "moral and ethical fairness": "#10b981",
155
+ "legal and bureaucratic": "#f59e0b",
156
+ "public safety and health": "#ef4444",
157
+ "unclear": "#64748b"
158
+ }
159
 
160
+ for res in results:
161
+ theme = str(res['data']['primary_theme']).lower()
162
+ color = color_map.get(theme, "#64748b")
163
+ words = ", ".join(res['data']['framing_words'])
164
+
165
+ hover_text = f"<b>{res['publisher']}</b><br>Theme: {theme.title()}<br>Keywords: {words}"
166
+
167
+ fig.add_trace(go.Scatter(
168
+ x=[res['data']['sentiment_score']],
169
+ y=[res['data']['subjectivity_score']],
170
+ mode='markers+text',
171
+ text=[res['publisher']],
172
+ textposition="top center",
173
+ marker=dict(size=14, color=color, line=dict(width=1, color='DarkSlateGrey')),
174
+ name=theme.title(),
175
+ hoverinfo="text",
176
+ hovertext=[hover_text],
177
+ showlegend=False
178
+ ))
179
 
180
+ fig.update_layout(
181
+ title="Global Media Polarization Map",
182
+ xaxis_title="Sentiment (Negative to Positive)",
183
+ yaxis_title="Subjectivity (Objective to Opinionated)",
184
+ xaxis=dict(range=[-1.1, 1.1], zeroline=True, zerolinewidth=2, zerolinecolor='rgba(0,0,0,0.2)'),
185
+ yaxis=dict(range=[-0.1, 1.1], zeroline=True, zerolinewidth=2, zerolinecolor='rgba(0,0,0,0.2)'),
186
+ height=600,
187
+ plot_bgcolor='#f8fafc'
188
+ )
189
+ return fig
190
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ if not GROQ_API_KEY:
193
+ st.warning("Groq API Token Missing.")
194
+ st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  # STATE MANAGEMENT
197
+ if "batch_results" not in st.session_state:
198
+ st.session_state.batch_results = []
 
 
 
 
 
 
 
 
199
 
200
+ search_topic = st.text_input("Enter a Global Event or Topic (e.g., 'Artificial Intelligence Act', 'Middle East Conflict')", placeholder="Search topic...")
201
 
202
+ if st.button("Generate Media Landscape", type="primary", use_container_width=True):
203
+ if not search_topic:
204
+ st.warning("Please enter a topic.")
 
 
205
  else:
206
+ with st.spinner(f"Fetching global articles for '{search_topic}'..."):
207
+ articles = fetch_topic_news(search_topic, limit=10)
208
+
209
+ if not articles:
210
+ st.error("Could not find recent articles for this topic.")
211
+ else:
212
+ st.info(f"Found {len(articles)} articles. Analyzing framing semantics.")
213
+ processed_results = []
214
+
215
+ with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
216
+ future_to_article = {}
217
+ for art in articles:
218
+ text = fetch_article_text(art["url"])
219
+ if not text.startswith("Error"):
220
+ future = executor.submit(analyze_article, text)
221
+ future_to_article[future] = art
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ for future in concurrent.futures.as_completed(future_to_article):
224
+ art_meta = future_to_article[future]
225
+ try:
226
+ analysis_data = future.result()
227
+ processed_results.append({
228
+ "publisher": art_meta["publisher"],
229
+ "title": art_meta["title"],
230
+ "data": analysis_data
231
+ })
232
+ except Exception as e:
233
+ pass
234
+
235
+ st.session_state.batch_results = processed_results
236
+
237
+ # Macro Analysis
238
+ if st.session_state.batch_results:
239
  st.divider()
240
+ st.plotly_chart(_create_macro_scatter_plot(st.session_state.batch_results), use_container_width=True)
241
+
242
+ st.markdown("### Source Breakdown")
243
+ for res in st.session_state.batch_results:
244
+ with st.expander(f"{res['publisher']} - {res['data']['primary_theme'].title()}"):
245
+ st.write(f"**Headline:** {res['title']}")
246
+ st.write(f"**Framing Words:** {', '.join(res['data']['framing_words'])}")
247
+ st.write(f"**Sentiment:** {res['data']['sentiment_score']:.2f} | **Subjectivity:** {res['data']['subjectivity_score']:.2f}")