NKessler commited on
Commit
84c0e52
·
verified ·
1 Parent(s): acce5d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -289
app.py CHANGED
@@ -1,18 +1,36 @@
1
  # imports
2
  import re
3
- import typing
 
4
  import plotly.graph_objects as go
5
  import streamlit as st
6
  from textblob import TextBlob
7
- from transformers import pipeline
8
  import textstat
9
  import trafilatura
10
  import requests
11
  from bs4 import BeautifulSoup
 
12
  import nltk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  # constants
15
- MAX_TEXT_LENGTH = 1500
16
 
17
  ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
18
  ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
@@ -20,193 +38,81 @@ ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves throu
20
  URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
21
  URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
22
 
23
- @st.cache_resource
24
- def _load_nlp_models() -> typing.Dict[str, typing.Any]:
25
- """
26
- Loads NLP models into memory and caches them.
27
- """
28
- try:
29
- nltk.data.find('tokenizers/punkt')
30
- except LookupError:
31
- nltk.download('punkt')
32
- nltk.download('punkt_tab')
33
-
34
- sentiment_analyzer = pipeline(
35
- "text-classification",
36
- model="ProsusAI/finbert",
37
- )
38
- emotion_classifier = pipeline(
39
- "text-classification",
40
- model="SamLowe/roberta-base-go_emotions",
41
- top_k=5
42
- )
43
- nli_classifier = pipeline(
44
- "text-classification",
45
- model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
46
- )
47
- theme_classifier = pipeline(
48
- "zero-shot-classification",
49
- model="facebook/bart-large-mnli"
50
- )
51
-
52
- return {
53
- "sentiment": sentiment_analyzer,
54
- "emotion": emotion_classifier,
55
- "nli": nli_classifier,
56
- "theme": theme_classifier,
57
- }
58
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  def analyze_article(text: str) -> dict:
61
- """Analyzes framing using semantic keyphrases, sentiment, emotion, readability and NER."""
62
- models = _load_nlp_models()
63
  safe_text = text[:MAX_TEXT_LENGTH]
64
-
65
- # Sentiment Analysis
66
- sentiment_result = models["sentiment"](safe_text, truncation=True)[0]
67
- label = sentiment_result["label"].lower()
68
- score = sentiment_result["score"]
69
- if label == "negative":
70
- sentiment_score = -score
71
- elif label == "positive":
72
- sentiment_score = score
73
- else:
74
- sentiment_score = 0.0 # Neutral
75
-
76
- # Emotion Classification
77
- emotion_results = models["emotion"](safe_text, truncation=True)
78
 
79
- if isinstance(emotion_results, list) and isinstance(emotion_results[0], list):
80
- emotion_results = emotion_results[0]
81
-
82
- if isinstance(emotion_results, list) and len(emotion_results) > 0:
83
- tone_scores = {res["label"]: res["score"] for res in emotion_results if res["label"] != "neutral"}
84
-
85
- if tone_scores:
86
- primary_tone = max(tone_scores, key=tone_scores.get)
87
- else:
88
- primary_tone = "neutral"
89
- tone_scores = {"neutral": 1.0}
90
- else:
91
- tone_scores = {"neutral": 1.0}
92
- primary_tone = "neutral"
93
-
94
- # Thematic Framing
95
- framing_dimensions = [
96
- "economic consequences",
97
- "moral and ethical fairness",
98
- "legal and bureaucratic",
99
- "public safety and health"
100
- ]
101
- theme_result = models["theme"](safe_text, framing_dimensions)
102
- primary_theme = theme_result["labels"][0]
103
 
104
- # Subjectivity and Readability Analysis
 
 
 
 
 
 
 
 
 
 
105
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
106
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
107
 
108
- reading_ease = max(0.0, min(100.0, raw_reading_ease))
109
-
110
  return {
111
- "sentiment_score": sentiment_score,
 
 
 
112
  "subjectivity_score": subjectivity_score,
113
- "reading_ease": reading_ease,
114
- "primary_tone": primary_tone,
115
- "primary_theme": primary_theme,
116
- "tone_scores": tone_scores,
117
  }
118
 
119
-
120
- def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
121
- """Generates a Plotly gauge chart for sentiment visualization."""
122
- fig = go.Figure(
123
- go.Indicator(
124
- mode="gauge+number",
125
- value=score,
126
- domain={"x": [0, 1], "y": [0, 1]},
127
- title={"text": title, "font": {"size": 16}},
128
- gauge={
129
- "axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
130
- "bar": {"color": "#475569", "thickness": 0.2},
131
- "bgcolor": "white",
132
- "borderwidth": 0,
133
- "steps": [
134
- {"range": [-1, -0.1], "color": "#fee2e2"},
135
- {"range": [-0.1, 0.1], "color": "#f1f5f9"},
136
- {"range": [0.1, 1], "color": "#dcfce3"},
137
- ],
138
- },
139
- )
140
- )
141
- fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
142
- return fig
143
-
144
-
145
- def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
146
- """Generates an overlapping radar chart to compare emotions."""
147
- categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
148
-
149
- val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
150
- val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
151
 
152
- categories.append(categories[0])
153
- val_a.append(val_a[0])
154
- val_b.append(val_b[0])
155
-
156
- fig = go.Figure()
157
- fig.add_trace(go.Scatterpolar(
158
- r=val_a, theta=categories, fill='toself', name='Source A',
159
- line=dict(color='#4f46e5', shape='spline', width=2),
160
- fillcolor='rgba(79, 70, 229, 0.2)'
161
- ))
162
- fig.add_trace(go.Scatterpolar(
163
- r=val_b, theta=categories, fill='toself', name='Source B',
164
- line=dict(color='#10b981', shape='spline', width=2),
165
- fillcolor='rgba(16, 185, 129, 0.2)'
166
- ))
167
- fig.update_layout(
168
- polar=dict(
169
- radialaxis=dict(visible=True, showticklabels=False, showline=False, gridcolor='rgba(0,0,0,0.1)'),
170
- angularaxis=dict(gridcolor='rgba(0,0,0,0.1)', linecolor='rgba(0,0,0,0.1)')
171
- ),
172
- showlegend=True,
173
- legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
174
- title={"text": "Relative Emotion Profile", "font": {"size": 18, "family": "sans-serif"}},
175
- height=400,
176
- margin=dict(l=40, r=40, t=60, b=40),
177
- paper_bgcolor='rgba(0,0,0,0)', # Transparent
178
- plot_bgcolor='rgba(0,0,0,0)'
179
- )
180
- return fig
181
-
182
-
183
- def _highlight_framing_words(text: str) -> str:
184
- """Highlights subjective or emotional words in the text snippet."""
185
- raw_sentences = re.split(r'(?<=[.!?]) +', text)
186
- snippet = " ".join(raw_sentences[:3])
187
- if not snippet:
188
- return ""
189
-
190
- blob = TextBlob(snippet)
191
- target_words = set()
192
 
193
- for word in blob.words:
194
- w_sentiment = TextBlob(word).sentiment
195
- if w_sentiment.subjectivity > 0.5 or abs(w_sentiment.polarity) > 0.3:
196
- if len(word) > 2:
197
- target_words.add(str(word))
198
-
199
- highlighted_snippet = snippet
200
- for word in target_words:
201
- pattern = r'\b(' + re.escape(word) + r')\b'
202
- replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
203
- highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
204
-
205
- return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
206
 
207
-
208
  def fetch_article_text(url: str) -> str:
209
- """Scrapes article text."""
210
  downloaded = trafilatura.fetch_url(url)
211
  if downloaded:
212
  text = trafilatura.extract(downloaded)
@@ -216,7 +122,6 @@ def fetch_article_text(url: str) -> str:
216
  try:
217
  headers = {
218
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
219
- 'Accept-Language': 'en-US,en;q=0.9',
220
  }
221
  response = requests.get(url, headers=headers, timeout=10)
222
  soup = BeautifulSoup(response.content, 'html.parser')
@@ -229,171 +134,149 @@ def fetch_article_text(url: str) -> str:
229
 
230
  if text and len(text) > 200:
231
  return text.strip()
232
-
233
- except Exception as e:
234
- return f"Error: Could not fetch URL. Connection failed."
235
 
236
- return "Error: Could not extract text. The site may be protected by hard paywalls."
237
 
 
 
 
 
 
 
238
 
239
- def check_contradiction(text_a: str, text_b: str) -> dict:
240
- """Uses Zero-Shot classification to evaluate the stance between arguments."""
241
- models = _load_nlp_models()
242
-
243
- combined_text = f"Text 1 says: {text_a[:300]} | Text 2 says: {text_b[:300]}"
244
-
245
- labels = [
246
- "the authors strongly disagree with each other",
247
- "the authors agree and share the same opinion",
248
- "the authors are talking about completely unrelated topics"
249
- ]
250
-
251
- result = models["theme"](combined_text, labels, truncation=True)
252
- top_label = result["labels"][0]
253
- score = result["scores"][0]
254
-
255
- rel_map = {
256
- "the authors strongly disagree with each other": "CONTRADICTION",
257
- "the authors agree and share the same opinion": "ENTAILMENT",
258
- "the authors are talking about completely unrelated topics": "NEUTRAL"
259
- }
260
-
261
- return {"relationship": rel_map[top_label], "confidence": score}
 
 
 
 
 
 
262
 
 
 
 
 
263
 
264
- # USER INTERFACE
265
- st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
 
 
 
 
 
 
 
266
 
 
 
267
  st.markdown("""
268
  <style>
269
- #MainMenu {visibility: hidden;}
270
- footer {visibility: hidden;}
271
- header {visibility: hidden;}
272
-
273
- .block-container {
274
- padding-top: 2rem;
275
- padding-bottom: 2rem;
276
- }
277
-
278
- [data-testid="stMetric"] {
279
- background-color: #f8fafc;
280
- border: 1px solid #e2e8f0;
281
- border-radius: 8px;
282
- padding: 15px;
283
- box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
284
- }
285
-
286
- [data-testid="stMetricValue"] > div {
287
- white-space: normal !important;
288
- word-wrap: break-word !important;
289
- line-height: 1.2 !important;
290
- font-size: 1.6rem !important;
291
- }
292
  </style>
293
  """, unsafe_allow_html=True)
294
 
295
- # STATE MANAGEMENT
296
- if "results_a" not in st.session_state:
297
- st.session_state.results_a = None
298
- if "results_b" not in st.session_state:
299
- st.session_state.results_b = None
300
- if "nli_result" not in st.session_state:
301
- st.session_state.nli_result = None
302
 
303
  st.title("FrameVis")
304
  st.markdown("##### Media bias and framing effects across global news sources.")
305
  st.divider()
306
 
307
- with st.spinner("Starting NLP models."):
308
- _load_nlp_models()
309
-
310
  input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
311
 
312
  col1, col2 = st.columns(2)
313
-
314
  with col1:
315
- if input_method == "Paste Text":
316
- user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
317
- else:
318
  url_a = st.text_input("Source A URL", value=URL_A)
319
- user_article_a = fetch_article_text(url_a) if url_a else ""
320
-
321
  with col2:
322
- if input_method == "Paste Text":
323
- user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
324
- else:
325
  url_b = st.text_input("Source B URL", value=URL_B)
326
- user_article_b = fetch_article_text(url_b) if url_b else ""
327
-
328
- st.write("")
329
 
330
- # Execution button
331
  if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
332
-
333
  text_a_clean = user_article_a.strip() if user_article_a else ""
334
  text_b_clean = user_article_b.strip() if user_article_b else ""
335
 
336
- if not text_a_clean or not text_b_clean:
337
- st.warning("Please provide text or a valid URL for both Source A and Source B before analyzing.")
338
-
339
- elif text_a_clean.startswith("Error:") or text_b_clean.startswith("Error:"):
340
- st.error("One of the URLs could not be scraped. Please copy and paste the text directly.")
341
-
342
  else:
343
- with st.spinner("Analyzing framing semantics for both sources."):
344
- st.session_state.results_a = analyze_article(text_a_clean)
345
- st.session_state.results_b = analyze_article(text_b_clean)
346
- st.session_state.nli_result = check_contradiction(text_a_clean, text_b_clean)
 
 
 
 
 
347
 
348
- # Analysis Display
349
- if st.session_state.results_a and st.session_state.results_b:
350
  st.divider()
351
  st.markdown("### Framing Analytics & Comparison")
352
 
353
- # Display Contradictions
354
  nli_result = st.session_state.nli_result
355
  if nli_result:
356
- if nli_result["relationship"].upper() == "CONTRADICTION":
357
- st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - These sources are disputing each other's facts.")
358
- elif nli_result["relationship"].upper() == "ENTAILMENT":
359
- st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - These sources agree on the core premise.")
360
- else:
361
- st.info(f"**NEUTRAL RELATIONSHIP** - These sources are discussing the topic without direct contradiction or alignment.")
362
 
363
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
364
 
365
  res_col1, res_col2 = st.columns(2)
366
 
367
- # Render Column A
368
  with res_col1:
369
  r_a = st.session_state.results_a
370
  st.markdown("#### Source A Breakdown")
371
- m1, m2 = st.columns(2)
372
- m3, m4 = st.columns(2)
373
- m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}")
374
- m2.metric("Primary Emotion", r_a['primary_tone'].title())
375
- m3.metric("Framing Lens", r_a['primary_theme'].title())
376
- m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}")
377
 
378
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
379
-
380
  st.markdown("**Key Framing Language:**")
381
- annotated_text = _highlight_framing_words(user_article_a)
382
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
383
 
384
- # Render Column B
385
  with res_col2:
386
  r_b = st.session_state.results_b
387
  st.markdown("#### Source B Breakdown")
388
- m1, m2 = st.columns(2)
389
- m3, m4 = st.columns(2)
390
- m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}")
391
- m2.metric("Primary Emotion", r_b['primary_tone'].title())
392
- m3.metric("Framing Lens", r_b['primary_theme'].title())
393
- m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}")
394
 
395
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
396
-
397
  st.markdown("**Key Framing Language:**")
398
- annotated_text = _highlight_framing_words(user_article_b)
399
- st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{annotated_text}</div>", unsafe_allow_html=True)
 
1
  # imports
2
  import re
3
+ import json
4
+ import concurrent.futures
5
  import plotly.graph_objects as go
6
  import streamlit as st
7
  from textblob import TextBlob
 
8
  import textstat
9
  import trafilatura
10
  import requests
11
  from bs4 import BeautifulSoup
12
+ from huggingface_hub import InferenceClient
13
  import nltk
14
+ import os
15
+
16
+ # --- INITIALIZATION ---
17
+ @st.cache_resource
18
+ def _initialize_app():
19
+ """Downloads lightweight NLTK data needed for highlighting."""
20
+ try:
21
+ nltk.data.find('tokenizers/punkt')
22
+ except LookupError:
23
+ nltk.download('punkt')
24
+ nltk.download('punkt_tab')
25
+
26
+ _initialize_app()
27
+
28
+ # Initialize Hugging Face Client
29
+ HF_TOKEN = os.environ.get("HF_TOKEN")
30
+ client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3", token=HF_TOKEN)
31
 
32
  # constants
33
+ MAX_TEXT_LENGTH = 2000 # ~400 words (safe token limit for fast API inference)
34
 
35
  ARTICLE_A = """In a long-overdue victory for working-class families, lawmakers unveiled a bold new wealth tax targeting the nation's ultra-rich. For decades, billionaires have exploited gaping loopholes to hoard unprecedented wealth while paying a fraction of what ordinary citizens pay in taxes. This progressive legislation finally forces the top 0.1% to pay their fair share. Advocates argue the trillions generated will revitalize crumbling public schools, expand healthcare access, and begin to heal the gaping wounds of systemic economic inequality that have ravaged our communities."""
36
  ARTICLE_B = """A radical new wealth tax proposed today has sent shockwaves through the financial sector, with economists warning the punitive measure will severely cripple investment and drive capital overseas. The heavy-handed legislation directly penalizes success and job creators, fundamentally undermining the free-market principles that drive innovation. Analysts caution that this bureaucratic overreach will inevitably backfire, stifling economic growth, destroying millions of private-sector jobs, and ultimately passing the financial burden down to the everyday consumer."""
 
38
  URL_A = "https://www.foxnews.com/live-news/trump-iran-israel-war-updates-march-30"
39
  URL_B = "https://edition.cnn.com/2026/03/30/world/live-news/iran-war-us-israel-trump"
40
 
41
+ # --- CORE LOGIC ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ def _extract_json_from_llm(response_text: str) -> dict:
44
+ """Robustly extracts JSON from an LLM response, ignoring markdown formatting."""
45
+ try:
46
+ match = re.search(r'\{.*\}', response_text, re.DOTALL)
47
+ if match:
48
+ return json.loads(match.group(0))
49
+ return json.loads(response_text)
50
+ except json.JSONDecodeError:
51
+ return {
52
+ "sentiment_score": 0.0, "primary_tone": "neutral",
53
+ "primary_theme": "unclear", "tone_scores": {"neutral": 1.0}
54
+ }
55
 
56
  def analyze_article(text: str) -> dict:
57
+ """Analyzes framing using an LLM API and calculates local readability."""
 
58
  safe_text = text[:MAX_TEXT_LENGTH]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ prompt = f"""
61
+ You are an expert media analyst. Analyze the following news snippet for framing, bias, and emotion.
62
+ Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting or explanations.
63
+
64
+ Keys to return:
65
+ "sentiment_score": A float between -1.0 (highly negative) and 1.0 (highly positive).
66
+ "primary_tone": The single dominant emotion (e.g., anger, fear, joy, sadness, surprise, neutral).
67
+ "primary_theme": Choose ONE from: ["economic consequences", "moral and ethical fairness", "legal and bureaucratic", "public safety and health"].
68
+ "tone_scores": A dictionary scoring the top 3 emotions present from 0.0 to 1.0 (e.g., {{"fear": 0.8, "anger": 0.5}}).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ Text to analyze:
71
+ "{safe_text}"
72
+ """
73
+
74
+ try:
75
+ response = client.text_generation(prompt, max_new_tokens=250, temperature=0.1)
76
+ llm_data = _extract_json_from_llm(response)
77
+ except Exception as e:
78
+ st.error(f"API Error: {e}")
79
+ llm_data = _extract_json_from_llm("") # fallback
80
+
81
  subjectivity_score = TextBlob(safe_text).sentiment.subjectivity
82
  raw_reading_ease = textstat.flesch_reading_ease(safe_text)
83
 
 
 
84
  return {
85
+ "sentiment_score": llm_data.get("sentiment_score", 0.0),
86
+ "primary_tone": llm_data.get("primary_tone", "neutral"),
87
+ "primary_theme": llm_data.get("primary_theme", "unclear"),
88
+ "tone_scores": llm_data.get("tone_scores", {"neutral": 1.0}),
89
  "subjectivity_score": subjectivity_score,
90
+ "reading_ease": max(0.0, min(100.0, raw_reading_ease)),
 
 
 
91
  }
92
 
93
+ def check_contradiction(text_a: str, text_b: str) -> dict:
94
+ """Uses the LLM to evaluate the stance between arguments."""
95
+ prompt = f"""
96
+ You are a fact-checking analyst. Compare these two news excerpts.
97
+ Return ONLY a valid JSON object with the exact keys below. Do not include markdown formatting.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ Keys to return:
100
+ "relationship": Choose ONE from: ["CONTRADICTION", "ENTAILMENT", "NEUTRAL"]. (Contradiction = disputing facts, Entailment = agreeing on premise).
101
+ "confidence": A float between 0.0 and 1.0 representing how confident you are.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ Text 1: "{text_a[:1000]}"
104
+ Text 2: "{text_b[:1000]}"
105
+ """
106
+ try:
107
+ response = client.text_generation(prompt, max_new_tokens=100, temperature=0.1)
108
+ result = _extract_json_from_llm(response)
109
+ return {"relationship": result.get("relationship", "NEUTRAL"), "confidence": result.get("confidence", 0.0)}
110
+ except:
111
+ return {"relationship": "NEUTRAL", "confidence": 0.0}
 
 
 
 
112
 
113
+ @st.cache_data(ttl=3600, show_spinner=False)
114
  def fetch_article_text(url: str) -> str:
115
+ """Scrapes article text with caching to prevent re-scraping."""
116
  downloaded = trafilatura.fetch_url(url)
117
  if downloaded:
118
  text = trafilatura.extract(downloaded)
 
122
  try:
123
  headers = {
124
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
 
125
  }
126
  response = requests.get(url, headers=headers, timeout=10)
127
  soup = BeautifulSoup(response.content, 'html.parser')
 
134
 
135
  if text and len(text) > 200:
136
  return text.strip()
137
+ except Exception:
138
+ return f"Error: Could not fetch URL."
 
139
 
140
+ return "Error: Could not extract text. The site may be protected by paywalls."
141
 
142
+ # --- UI COMPONENTS ---
143
+ def _highlight_framing_words(text: str) -> str:
144
+ """Highlights specific subjective or emotional words in the text snippet."""
145
+ raw_sentences = re.split(r'(?<=[.!?]) +', text)
146
+ snippet = " ".join(raw_sentences[:3])
147
+ if not snippet: return ""
148
 
149
+ blob = TextBlob(snippet)
150
+ target_words = {str(w) for w in blob.words if len(w) > 2 and (TextBlob(w).sentiment.subjectivity > 0.5 or abs(TextBlob(w).sentiment.polarity) > 0.3)}
151
+
152
+ highlighted_snippet = snippet
153
+ for word in target_words:
154
+ pattern = r'\b(' + re.escape(word) + r')\b'
155
+ replacement = r"<span style='background-color: #fef08a; color: #854d0e; font-weight: 600; padding: 0.1rem 0.2rem; border-radius: 4px;'>\1</span>"
156
+ highlighted_snippet = re.sub(pattern, replacement, highlighted_snippet, flags=re.IGNORECASE)
157
+
158
+ return highlighted_snippet + ("..." if len(raw_sentences) > 3 else "")
159
+
160
+ def _create_sentiment_gauge(score: float, title: str) -> go.Figure:
161
+ fig = go.Figure(go.Indicator(
162
+ mode="gauge+number", value=score, domain={"x": [0, 1], "y": [0, 1]},
163
+ title={"text": title, "font": {"size": 16}},
164
+ gauge={
165
+ "axis": {"range": [-1, 1], "tickwidth": 1, "tickcolor": "darkgrey"},
166
+ "bar": {"color": "#475569", "thickness": 0.2},
167
+ "bgcolor": "white", "borderwidth": 0,
168
+ "steps": [{"range": [-1, -0.1], "color": "#fee2e2"}, {"range": [-0.1, 0.1], "color": "#f1f5f9"}, {"range": [0.1, 1], "color": "#dcfce3"}],
169
+ }
170
+ ))
171
+ fig.update_layout(height=280, margin=dict(l=20, r=20, t=60, b=20))
172
+ return fig
173
+
174
+ def _create_comparison_radar_chart(results_a: dict, results_b: dict) -> go.Figure:
175
+ categories = sorted(list(set(list(results_a["tone_scores"].keys()) + list(results_b["tone_scores"].keys()))))
176
+ val_a = [results_a["tone_scores"].get(c, 0) for c in categories]
177
+ val_b = [results_b["tone_scores"].get(c, 0) for c in categories]
178
 
179
+ if categories:
180
+ categories.append(categories[0])
181
+ val_a.append(val_a[0])
182
+ val_b.append(val_b[0])
183
 
184
+ fig = go.Figure()
185
+ fig.add_trace(go.Scatterpolar(r=val_a, theta=categories, fill='toself', name='Source A', line=dict(color='#4f46e5', width=2), fillcolor='rgba(79, 70, 229, 0.2)'))
186
+ fig.add_trace(go.Scatterpolar(r=val_b, theta=categories, fill='toself', name='Source B', line=dict(color='#10b981', width=2), fillcolor='rgba(16, 185, 129, 0.2)'))
187
+ fig.update_layout(
188
+ polar=dict(radialaxis=dict(visible=True, showticklabels=False, showline=False), angularaxis=dict(gridcolor='rgba(0,0,0,0.1)')),
189
+ showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
190
+ title={"text": "Relative Emotion Profile", "font": {"size": 18}}, height=400, margin=dict(l=40, r=40, t=60, b=40), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)'
191
+ )
192
+ return fig
193
 
194
+ # --- STREAMLIT UI ---
195
+ st.set_page_config(page_title="FrameVis | Media Framing", layout="wide")
196
  st.markdown("""
197
  <style>
198
+ .block-container { padding-top: 2rem; padding-bottom: 2rem; }
199
+ [data-testid="stMetric"] { background-color: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; padding: 15px; box-shadow: 0 1px 2px 0 rgba(0,0,0,0.05); }
200
+ [data-testid="stMetricValue"] > div { white-space: normal !important; word-wrap: break-word !important; line-height: 1.2 !important; font-size: 1.6rem !important; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  </style>
202
  """, unsafe_allow_html=True)
203
 
204
+ if not HF_TOKEN:
205
+ st.error("Hugging Face Token Missing.")
206
+ st.stop()
 
 
 
 
207
 
208
  st.title("FrameVis")
209
  st.markdown("##### Media bias and framing effects across global news sources.")
210
  st.divider()
211
 
 
 
 
212
  input_method = st.radio("Input Method", ["Paste Text", "Paste URL"], horizontal=True, index=0)
213
 
214
  col1, col2 = st.columns(2)
 
215
  with col1:
216
+ if input_method == "Paste Text": user_article_a = st.text_area("Data Source A", value=ARTICLE_A.strip(), height=220)
217
+ else:
 
218
  url_a = st.text_input("Source A URL", value=URL_A)
219
+ with st.spinner("Scraping Source A..."): user_article_a = fetch_article_text(url_a) if url_a else ""
 
220
  with col2:
221
+ if input_method == "Paste Text": user_article_b = st.text_area("Data Source B", value=ARTICLE_B.strip(), height=220)
222
+ else:
 
223
  url_b = st.text_input("Source B URL", value=URL_B)
224
+ with st.spinner("Scraping Source B..."): user_article_b = fetch_article_text(url_b) if url_b else ""
 
 
225
 
 
226
  if st.button("Analyze and Compare Sources", use_container_width=True, type="primary"):
 
227
  text_a_clean = user_article_a.strip() if user_article_a else ""
228
  text_b_clean = user_article_b.strip() if user_article_b else ""
229
 
230
+ if not text_a_clean or not text_b_clean: st.warning("Please provide text/URLs for both sources.")
231
+ elif text_a_clean.startswith("Error") or text_b_clean.startswith("Error"): st.error("Scraping failed. Try pasting text directly.")
 
 
 
 
232
  else:
233
+ with st.spinner("Running deep semantic analysis."):
234
+ with concurrent.futures.ThreadPoolExecutor() as executor:
235
+ future_a = executor.submit(analyze_article, text_a_clean)
236
+ future_b = executor.submit(analyze_article, text_b_clean)
237
+ future_nli = executor.submit(check_contradiction, text_a_clean, text_b_clean)
238
+
239
+ st.session_state.results_a = future_a.result()
240
+ st.session_state.results_b = future_b.result()
241
+ st.session_state.nli_result = future_nli.result()
242
 
243
+ if "results_a" in st.session_state and st.session_state.results_a:
 
244
  st.divider()
245
  st.markdown("### Framing Analytics & Comparison")
246
 
 
247
  nli_result = st.session_state.nli_result
248
  if nli_result:
249
+ rel = nli_result["relationship"].upper()
250
+ if rel == "CONTRADICTION": st.error(f"**NARRATIVE CONTRADICTION** (Confidence: {nli_result['confidence']:.2f}) - Disputing facts.")
251
+ elif rel == "ENTAILMENT": st.success(f"**NARRATIVE ALIGNMENT** (Confidence: {nli_result['confidence']:.2f}) - Agreeing on premises.")
252
+ else: st.info("**NEUTRAL RELATIONSHIP** - Discussing without direct contradiction.")
 
 
253
 
254
  st.plotly_chart(_create_comparison_radar_chart(st.session_state.results_a, st.session_state.results_b), use_container_width=True)
255
 
256
  res_col1, res_col2 = st.columns(2)
257
 
 
258
  with res_col1:
259
  r_a = st.session_state.results_a
260
  st.markdown("#### Source A Breakdown")
261
+ m1, m2 = st.columns(2); m3, m4 = st.columns(2)
262
+ m1.metric("Subjectivity", f"{r_a['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
263
+ m2.metric("Primary Emotion", str(r_a['primary_tone']).title())
264
+ m3.metric("Framing Lens", str(r_a['primary_theme']).title())
265
+ m4.metric("Reading Ease", f"{r_a['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
 
266
 
267
  st.plotly_chart(_create_sentiment_gauge(r_a["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_a")
 
268
  st.markdown("**Key Framing Language:**")
269
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{_highlight_framing_words(user_article_a)}</div>", unsafe_allow_html=True)
 
270
 
 
271
  with res_col2:
272
  r_b = st.session_state.results_b
273
  st.markdown("#### Source B Breakdown")
274
+ m1, m2 = st.columns(2); m3, m4 = st.columns(2)
275
+ m1.metric("Subjectivity", f"{r_b['subjectivity_score']:.2f}", help="0 is objective, 1 is highly opinionated.")
276
+ m2.metric("Primary Emotion", str(r_b['primary_tone']).title())
277
+ m3.metric("Framing Lens", str(r_b['primary_theme']).title())
278
+ m4.metric("Reading Ease", f"{r_b['reading_ease']:.1f}", help="0-30 is college graduate level, 60-70 is 8th grade.")
 
279
 
280
  st.plotly_chart(_create_sentiment_gauge(r_b["sentiment_score"], "Sentiment Bias"), use_container_width=True, key="gauge_b")
 
281
  st.markdown("**Key Framing Language:**")
282
+ st.markdown(f"<div style='background-color: #f8fafc; padding: 1rem; border-radius: 8px; border: 1px solid #e2e8f0;'>{_highlight_framing_words(user_article_b)}</div>", unsafe_allow_html=True)